Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 
     21 import com.android.inputmethod.keyboard.KeyDetector;
     22 import com.android.inputmethod.keyboard.Keyboard;
     23 import com.android.inputmethod.keyboard.ProximityInfo;
     24 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
     25 
     26 import java.util.ArrayList;
     27 import java.util.LinkedList;
     28 
     29 /**
     30  * Base class for an in-memory dictionary that can grow dynamically and can
     31  * be searched for suggestions and valid words.
     32  */
     33 public class ExpandableDictionary extends Dictionary {
     34 
     35     // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8.
     36     protected static final int BIGRAM_MAX_FREQUENCY = 255;
     37 
     38     private Context mContext;
     39     private char[] mWordBuilder = new char[BinaryDictionary.MAX_WORD_LENGTH];
     40     private int mDicTypeId;
     41     private int mMaxDepth;
     42     private int mInputLength;
     43 
     44     private boolean mRequiresReload;
     45 
     46     private boolean mUpdatingDictionary;
     47 
     48     // Use this lock before touching mUpdatingDictionary & mRequiresDownload
     49     private Object mUpdatingLock = new Object();
     50 
     51     private static class Node {
     52         Node() {}
     53         char mCode;
     54         int mFrequency;
     55         boolean mTerminal;
     56         Node mParent;
     57         NodeArray mChildren;
     58         ArrayList<char[]> mShortcutTargets;
     59         boolean mShortcutOnly;
     60         LinkedList<NextWord> mNGrams; // Supports ngram
     61     }
     62 
     63     private static class NodeArray {
     64         Node[] mData;
     65         int mLength = 0;
     66         private static final int INCREMENT = 2;
     67 
     68         NodeArray() {
     69             mData = new Node[INCREMENT];
     70         }
     71 
     72         void add(Node n) {
     73             if (mLength + 1 > mData.length) {
     74                 Node[] tempData = new Node[mLength + INCREMENT];
     75                 if (mLength > 0) {
     76                     System.arraycopy(mData, 0, tempData, 0, mLength);
     77                 }
     78                 mData = tempData;
     79             }
     80             mData[mLength++] = n;
     81         }
     82     }
     83 
     84     protected interface NextWord {
     85         public Node getWordNode();
     86         public int getFrequency();
     87         public ForgettingCurveParams getFcParams();
     88         public int notifyTypedAgainAndGetFrequency();
     89     }
     90 
     91     private static class NextStaticWord implements NextWord {
     92         public final Node mWord;
     93         private final int mFrequency;
     94         public NextStaticWord(Node word, int frequency) {
     95             mWord = word;
     96             mFrequency = frequency;
     97         }
     98 
     99         @Override
    100         public Node getWordNode() {
    101             return mWord;
    102         }
    103 
    104         @Override
    105         public int getFrequency() {
    106             return mFrequency;
    107         }
    108 
    109         @Override
    110         public ForgettingCurveParams getFcParams() {
    111             return null;
    112         }
    113 
    114         @Override
    115         public int notifyTypedAgainAndGetFrequency() {
    116             return mFrequency;
    117         }
    118     }
    119 
    120     private static class NextHistoryWord implements NextWord {
    121         public final Node mWord;
    122         public final ForgettingCurveParams mFcp;
    123 
    124         public NextHistoryWord(Node word, ForgettingCurveParams fcp) {
    125             mWord = word;
    126             mFcp = fcp;
    127         }
    128 
    129         @Override
    130         public Node getWordNode() {
    131             return mWord;
    132         }
    133 
    134         @Override
    135         public int getFrequency() {
    136             return mFcp.getFrequency();
    137         }
    138 
    139         @Override
    140         public ForgettingCurveParams getFcParams() {
    141             return mFcp;
    142         }
    143 
    144         @Override
    145         public int notifyTypedAgainAndGetFrequency() {
    146             return mFcp.notifyTypedAgainAndGetFrequency();
    147         }
    148     }
    149 
    150     private NodeArray mRoots;
    151 
    152     private int[][] mCodes;
    153 
    154     public ExpandableDictionary(Context context, int dicTypeId) {
    155         mContext = context;
    156         clearDictionary();
    157         mCodes = new int[BinaryDictionary.MAX_WORD_LENGTH][];
    158         mDicTypeId = dicTypeId;
    159     }
    160 
    161     public void loadDictionary() {
    162         synchronized (mUpdatingLock) {
    163             startDictionaryLoadingTaskLocked();
    164         }
    165     }
    166 
    167     public void startDictionaryLoadingTaskLocked() {
    168         if (!mUpdatingDictionary) {
    169             mUpdatingDictionary = true;
    170             mRequiresReload = false;
    171             new LoadDictionaryTask().start();
    172         }
    173     }
    174 
    175     public void setRequiresReload(boolean reload) {
    176         synchronized (mUpdatingLock) {
    177             mRequiresReload = reload;
    178         }
    179     }
    180 
    181     public boolean getRequiresReload() {
    182         return mRequiresReload;
    183     }
    184 
    185     /** Override to load your dictionary here, on a background thread. */
    186     public void loadDictionaryAsync() {
    187         // empty base implementation
    188     }
    189 
    190     public Context getContext() {
    191         return mContext;
    192     }
    193 
    194     public int getMaxWordLength() {
    195         return BinaryDictionary.MAX_WORD_LENGTH;
    196     }
    197 
    198     public void addWord(final String word, final String shortcutTarget, final int frequency) {
    199         if (word.length() >= BinaryDictionary.MAX_WORD_LENGTH) {
    200             return;
    201         }
    202         addWordRec(mRoots, word, 0, shortcutTarget, frequency, null);
    203     }
    204 
    205     private void addWordRec(NodeArray children, final String word, final int depth,
    206             final String shortcutTarget, final int frequency, Node parentNode) {
    207         final int wordLength = word.length();
    208         if (wordLength <= depth) return;
    209         final char c = word.charAt(depth);
    210         // Does children have the current character?
    211         final int childrenLength = children.mLength;
    212         Node childNode = null;
    213         for (int i = 0; i < childrenLength; i++) {
    214             final Node node = children.mData[i];
    215             if (node.mCode == c) {
    216                 childNode = node;
    217                 break;
    218             }
    219         }
    220         final boolean isShortcutOnly = (null != shortcutTarget);
    221         if (childNode == null) {
    222             childNode = new Node();
    223             childNode.mCode = c;
    224             childNode.mParent = parentNode;
    225             childNode.mShortcutOnly = isShortcutOnly;
    226             children.add(childNode);
    227         }
    228         if (wordLength == depth + 1 && shortcutTarget != null) {
    229             // Terminate this word
    230             childNode.mTerminal = true;
    231             if (isShortcutOnly) {
    232                 if (null == childNode.mShortcutTargets) {
    233                     childNode.mShortcutTargets = new ArrayList<char[]>();
    234                 }
    235                 childNode.mShortcutTargets.add(shortcutTarget.toCharArray());
    236             } else {
    237                 childNode.mShortcutOnly = false;
    238             }
    239             childNode.mFrequency = Math.max(frequency, childNode.mFrequency);
    240             if (childNode.mFrequency > 255) childNode.mFrequency = 255;
    241             return;
    242         }
    243         if (childNode.mChildren == null) {
    244             childNode.mChildren = new NodeArray();
    245         }
    246         addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode);
    247     }
    248 
    249     @Override
    250     public void getWords(final WordComposer codes, final CharSequence prevWordForBigrams,
    251             final WordCallback callback, final ProximityInfo proximityInfo) {
    252         synchronized (mUpdatingLock) {
    253             // If we need to update, start off a background task
    254             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    255             // Currently updating contacts, don't return any results.
    256             if (mUpdatingDictionary) return;
    257         }
    258         if (codes.size() >= BinaryDictionary.MAX_WORD_LENGTH) {
    259             return;
    260         }
    261         getWordsInner(codes, prevWordForBigrams, callback, proximityInfo);
    262     }
    263 
    264     protected final void getWordsInner(final WordComposer codes,
    265             final CharSequence prevWordForBigrams, final WordCallback callback,
    266             final ProximityInfo proximityInfo) {
    267         mInputLength = codes.size();
    268         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
    269         final int[] xCoordinates = codes.getXCoordinates();
    270         final int[] yCoordinates = codes.getYCoordinates();
    271         // Cache the codes so that we don't have to lookup an array list
    272         for (int i = 0; i < mInputLength; i++) {
    273             // TODO: Calculate proximity info here.
    274             if (mCodes[i] == null || mCodes[i].length < 1) {
    275                 mCodes[i] = new int[ProximityInfo.MAX_PROXIMITY_CHARS_SIZE];
    276             }
    277             final int x = xCoordinates != null && i < xCoordinates.length ?
    278                     xCoordinates[i] : WordComposer.NOT_A_COORDINATE;
    279             final int y = xCoordinates != null && i < yCoordinates.length ?
    280                     yCoordinates[i] : WordComposer.NOT_A_COORDINATE;
    281             proximityInfo.fillArrayWithNearestKeyCodes(x, y, codes.getCodeAt(i), mCodes[i]);
    282         }
    283         mMaxDepth = mInputLength * 3;
    284         getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback);
    285         for (int i = 0; i < mInputLength; i++) {
    286             getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback);
    287         }
    288     }
    289 
    290     @Override
    291     public synchronized boolean isValidWord(CharSequence word) {
    292         synchronized (mUpdatingLock) {
    293             // If we need to update, start off a background task
    294             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    295             if (mUpdatingDictionary) return false;
    296         }
    297         final Node node = searchNode(mRoots, word, 0, word.length());
    298         // If node is null, we didn't find the word, so it's not valid.
    299         // If node.mShortcutOnly is true, then it exists as a shortcut but not as a word,
    300         // so that means it's not a valid word.
    301         // If node.mShortcutOnly is false, then it exists as a word (it may also exist as
    302         // a shortcut, but this does not matter), so it's a valid word.
    303         return (node == null) ? false : !node.mShortcutOnly;
    304     }
    305 
    306     protected boolean removeBigram(String word1, String word2) {
    307         // Refer to addOrSetBigram() about word1.toLowerCase()
    308         final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
    309         final Node secondWord = searchWord(mRoots, word2, 0, null);
    310         LinkedList<NextWord> bigrams = firstWord.mNGrams;
    311         NextWord bigramNode = null;
    312         if (bigrams == null || bigrams.size() == 0) {
    313             return false;
    314         } else {
    315             for (NextWord nw : bigrams) {
    316                 if (nw.getWordNode() == secondWord) {
    317                     bigramNode = nw;
    318                     break;
    319                 }
    320             }
    321         }
    322         if (bigramNode == null) {
    323             return false;
    324         }
    325         return bigrams.remove(bigramNode);
    326     }
    327 
    328     /**
    329      * Returns the word's frequency or -1 if not found
    330      */
    331     protected int getWordFrequency(CharSequence word) {
    332         // Case-sensitive search
    333         final Node node = searchNode(mRoots, word, 0, word.length());
    334         return (node == null) ? -1 : node.mFrequency;
    335     }
    336 
    337     protected NextWord getBigramWord(String word1, String word2) {
    338         // Refer to addOrSetBigram() about word1.toLowerCase()
    339         final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
    340         final Node secondWord = searchWord(mRoots, word2, 0, null);
    341         LinkedList<NextWord> bigrams = firstWord.mNGrams;
    342         if (bigrams == null || bigrams.size() == 0) {
    343             return null;
    344         } else {
    345             for (NextWord nw : bigrams) {
    346                 if (nw.getWordNode() == secondWord) {
    347                     return nw;
    348                 }
    349             }
    350         }
    351         return null;
    352     }
    353 
    354     private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
    355         // The computation itself makes sense for >= 2, but the == 2 case returns 0
    356         // anyway so we may as well test against 3 instead and return the constant
    357         if (inputLength >= 3) {
    358             return (freq * snr * (inputLength - 2)) / (inputLength - 1);
    359         } else {
    360             return 0;
    361         }
    362     }
    363 
    364     /**
    365      * Helper method to add a word and its shortcuts.
    366      *
    367      * @param node the terminal node
    368      * @param word the word to insert, as an array of code points
    369      * @param depth the depth of the node in the tree
    370      * @param finalFreq the frequency for this word
    371      * @return whether there is still space for more words.
    372      * @see Dictionary.WordCallback#addWord(char[], int, int, int, int, int)
    373      */
    374     private boolean addWordAndShortcutsFromNode(final Node node, final char[] word, final int depth,
    375             final int finalFreq, final WordCallback callback) {
    376         if (finalFreq > 0 && !node.mShortcutOnly) {
    377             if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, Dictionary.UNIGRAM)) {
    378                 return false;
    379             }
    380         }
    381         if (null != node.mShortcutTargets) {
    382             final int length = node.mShortcutTargets.size();
    383             for (int shortcutIndex = 0; shortcutIndex < length; ++shortcutIndex) {
    384                 final char[] shortcut = node.mShortcutTargets.get(shortcutIndex);
    385                 if (!callback.addWord(shortcut, 0, shortcut.length, finalFreq, mDicTypeId,
    386                         Dictionary.UNIGRAM)) {
    387                     return false;
    388                 }
    389             }
    390         }
    391         return true;
    392     }
    393 
    394     /**
    395      * Recursively traverse the tree for words that match the input. Input consists of
    396      * a list of arrays. Each item in the list is one input character position. An input
    397      * character is actually an array of multiple possible candidates. This function is not
    398      * optimized for speed, assuming that the user dictionary will only be a few hundred words in
    399      * size.
    400      * @param roots node whose children have to be search for matches
    401      * @param codes the input character codes
    402      * @param word the word being composed as a possible match
    403      * @param depth the depth of traversal - the length of the word being composed thus far
    404      * @param completion whether the traversal is now in completion mode - meaning that we've
    405      * exhausted the input and we're looking for all possible suffixes.
    406      * @param snr current weight of the word being formed
    407      * @param inputIndex position in the input characters. This can be off from the depth in
    408      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
    409      * "wouldve", it could be matching "would've", so the depth will be one more than the
    410      * inputIndex
    411      * @param callback the callback class for adding a word
    412      */
    413     // TODO: Share this routine with the native code for BinaryDictionary
    414     protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
    415             final int depth, final boolean completion, int snr, int inputIndex, int skipPos,
    416             WordCallback callback) {
    417         final int count = roots.mLength;
    418         final int codeSize = mInputLength;
    419         // Optimization: Prune out words that are too long compared to how much was typed.
    420         if (depth > mMaxDepth) {
    421             return;
    422         }
    423         final int[] currentChars;
    424         if (codeSize <= inputIndex) {
    425             currentChars = null;
    426         } else {
    427             currentChars = mCodes[inputIndex];
    428         }
    429 
    430         for (int i = 0; i < count; i++) {
    431             final Node node = roots.mData[i];
    432             final char c = node.mCode;
    433             final char lowerC = toLowerCase(c);
    434             final boolean terminal = node.mTerminal;
    435             final NodeArray children = node.mChildren;
    436             final int freq = node.mFrequency;
    437             if (completion || currentChars == null) {
    438                 word[depth] = c;
    439                 if (terminal) {
    440                     final int finalFreq;
    441                     if (skipPos < 0) {
    442                         finalFreq = freq * snr;
    443                     } else {
    444                         finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
    445                     }
    446                     if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, callback)) {
    447                         // No space left in the queue, bail out
    448                         return;
    449                     }
    450                 }
    451                 if (children != null) {
    452                     getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex,
    453                             skipPos, callback);
    454                 }
    455             } else if ((c == Keyboard.CODE_SINGLE_QUOTE
    456                     && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) {
    457                 // Skip the ' and continue deeper
    458                 word[depth] = c;
    459                 if (children != null) {
    460                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
    461                             skipPos, callback);
    462                 }
    463             } else {
    464                 // Don't use alternatives if we're looking for missing characters
    465                 final int alternativesSize = skipPos >= 0 ? 1 : currentChars.length;
    466                 for (int j = 0; j < alternativesSize; j++) {
    467                     final int addedAttenuation = (j > 0 ? 1 : 2);
    468                     final int currentChar = currentChars[j];
    469                     if (currentChar == KeyDetector.NOT_A_CODE) {
    470                         break;
    471                     }
    472                     if (currentChar == lowerC || currentChar == c) {
    473                         word[depth] = c;
    474 
    475                         if (codeSize == inputIndex + 1) {
    476                             if (terminal) {
    477                                 final int finalFreq;
    478                                 if (skipPos < 0) {
    479                                     finalFreq = freq * snr * addedAttenuation
    480                                             * FULL_WORD_SCORE_MULTIPLIER;
    481                                 } else {
    482                                     finalFreq = computeSkippedWordFinalFreq(freq,
    483                                             snr * addedAttenuation, mInputLength);
    484                                 }
    485                                 if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq,
    486                                         callback)) {
    487                                     // No space left in the queue, bail out
    488                                     return;
    489                                 }
    490                             }
    491                             if (children != null) {
    492                                 getWordsRec(children, codes, word, depth + 1,
    493                                         true, snr * addedAttenuation, inputIndex + 1,
    494                                         skipPos, callback);
    495                             }
    496                         } else if (children != null) {
    497                             getWordsRec(children, codes, word, depth + 1,
    498                                     false, snr * addedAttenuation, inputIndex + 1,
    499                                     skipPos, callback);
    500                         }
    501                     }
    502                 }
    503             }
    504         }
    505     }
    506 
    507     public int setBigramAndGetFrequency(String word1, String word2, int frequency) {
    508         return setBigramAndGetFrequency(word1, word2, frequency, null /* unused */);
    509     }
    510 
    511     public int setBigramAndGetFrequency(String word1, String word2, ForgettingCurveParams fcp) {
    512         return setBigramAndGetFrequency(word1, word2, 0 /* unused */, fcp);
    513     }
    514 
    515     /**
    516      * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
    517      * @param frequency frequency for this bigram
    518      * @param addFrequency if true, it adds to current frequency, else it overwrites the old value
    519      * @return returns the final bigram frequency
    520      */
    521     private int setBigramAndGetFrequency(
    522             String word1, String word2, int frequency, ForgettingCurveParams fcp) {
    523         // We don't want results to be different according to case of the looked up left hand side
    524         // word. We do want however to return the correct case for the right hand side.
    525         // So we want to squash the case of the left hand side, and preserve that of the right
    526         // hand side word.
    527         Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
    528         Node secondWord = searchWord(mRoots, word2, 0, null);
    529         LinkedList<NextWord> bigrams = firstWord.mNGrams;
    530         if (bigrams == null || bigrams.size() == 0) {
    531             firstWord.mNGrams = new LinkedList<NextWord>();
    532             bigrams = firstWord.mNGrams;
    533         } else {
    534             for (NextWord nw : bigrams) {
    535                 if (nw.getWordNode() == secondWord) {
    536                     return nw.notifyTypedAgainAndGetFrequency();
    537                 }
    538             }
    539         }
    540         if (fcp != null) {
    541             // history
    542             firstWord.mNGrams.add(new NextHistoryWord(secondWord, fcp));
    543         } else {
    544             firstWord.mNGrams.add(new NextStaticWord(secondWord, frequency));
    545         }
    546         return frequency;
    547     }
    548 
    549     /**
    550      * Searches for the word and add the word if it does not exist.
    551      * @return Returns the terminal node of the word we are searching for.
    552      */
    553     private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
    554         final int wordLength = word.length();
    555         final char c = word.charAt(depth);
    556         // Does children have the current character?
    557         final int childrenLength = children.mLength;
    558         Node childNode = null;
    559         for (int i = 0; i < childrenLength; i++) {
    560             final Node node = children.mData[i];
    561             if (node.mCode == c) {
    562                 childNode = node;
    563                 break;
    564             }
    565         }
    566         if (childNode == null) {
    567             childNode = new Node();
    568             childNode.mCode = c;
    569             childNode.mParent = parentNode;
    570             children.add(childNode);
    571         }
    572         if (wordLength == depth + 1) {
    573             // Terminate this word
    574             childNode.mTerminal = true;
    575             return childNode;
    576         }
    577         if (childNode.mChildren == null) {
    578             childNode.mChildren = new NodeArray();
    579         }
    580         return searchWord(childNode.mChildren, word, depth + 1, childNode);
    581     }
    582 
    583     // @VisibleForTesting
    584     boolean reloadDictionaryIfRequired() {
    585         synchronized (mUpdatingLock) {
    586             // If we need to update, start off a background task
    587             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    588             // Currently updating contacts, don't return any results.
    589             return mUpdatingDictionary;
    590         }
    591     }
    592 
    593     private void runBigramReverseLookUp(final CharSequence previousWord,
    594             final WordCallback callback) {
    595         // Search for the lowercase version of the word only, because that's where bigrams
    596         // store their sons.
    597         Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0,
    598                 previousWord.length());
    599         if (prevWord != null && prevWord.mNGrams != null) {
    600             reverseLookUp(prevWord.mNGrams, callback);
    601         }
    602     }
    603 
    604     @Override
    605     public void getBigrams(final WordComposer codes, final CharSequence previousWord,
    606             final WordCallback callback) {
    607         if (!reloadDictionaryIfRequired()) {
    608             runBigramReverseLookUp(previousWord, callback);
    609         }
    610     }
    611 
    612     /**
    613      * Used for testing purposes and in the spell checker
    614      * This function will wait for loading from database to be done
    615      */
    616     void waitForDictionaryLoading() {
    617         while (mUpdatingDictionary) {
    618             try {
    619                 Thread.sleep(100);
    620             } catch (InterruptedException e) {
    621                 //
    622             }
    623         }
    624     }
    625 
    626     protected final void blockingReloadDictionaryIfRequired() {
    627         reloadDictionaryIfRequired();
    628         waitForDictionaryLoading();
    629     }
    630 
    631     // Local to reverseLookUp, but do not allocate each time.
    632     private final char[] mLookedUpString = new char[BinaryDictionary.MAX_WORD_LENGTH];
    633 
    634     /**
    635      * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
    636      * through callback.
    637      * @param terminalNodes list of terminal nodes we want to add
    638      */
    639     private void reverseLookUp(LinkedList<NextWord> terminalNodes,
    640             final WordCallback callback) {
    641         Node node;
    642         int freq;
    643         for (NextWord nextWord : terminalNodes) {
    644             node = nextWord.getWordNode();
    645             freq = nextWord.getFrequency();
    646             int index = BinaryDictionary.MAX_WORD_LENGTH;
    647             do {
    648                 --index;
    649                 mLookedUpString[index] = node.mCode;
    650                 node = node.mParent;
    651             } while (node != null);
    652 
    653             if (freq >= 0) {
    654                 callback.addWord(mLookedUpString, index, BinaryDictionary.MAX_WORD_LENGTH - index,
    655                         freq, mDicTypeId, Dictionary.BIGRAM);
    656             }
    657         }
    658     }
    659 
    660     /**
    661      * Recursively search for the terminal node of the word.
    662      *
    663      * One iteration takes the full word to search for and the current index of the recursion.
    664      *
    665      * @param children the node of the trie to search under.
    666      * @param word the word to search for. Only read [offset..length] so there may be trailing chars
    667      * @param offset the index in {@code word} this recursion should operate on.
    668      * @param length the length of the input word.
    669      * @return Returns the terminal node of the word if the word exists
    670      */
    671     private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
    672             final int length) {
    673         final int count = children.mLength;
    674         final char currentChar = word.charAt(offset);
    675         for (int j = 0; j < count; j++) {
    676             final Node node = children.mData[j];
    677             if (node.mCode == currentChar) {
    678                 if (offset == length - 1) {
    679                     if (node.mTerminal) {
    680                         return node;
    681                     }
    682                 } else {
    683                     if (node.mChildren != null) {
    684                         Node returnNode = searchNode(node.mChildren, word, offset + 1, length);
    685                         if (returnNode != null) return returnNode;
    686                     }
    687                 }
    688             }
    689         }
    690         return null;
    691     }
    692 
    693     protected void clearDictionary() {
    694         mRoots = new NodeArray();
    695     }
    696 
    697     private class LoadDictionaryTask extends Thread {
    698         LoadDictionaryTask() {}
    699         @Override
    700         public void run() {
    701             loadDictionaryAsync();
    702             synchronized (mUpdatingLock) {
    703                 mUpdatingDictionary = false;
    704             }
    705         }
    706     }
    707 
    708     private static char toLowerCase(char c) {
    709         char baseChar = c;
    710         if (c < BASE_CHARS.length) {
    711             baseChar = BASE_CHARS[c];
    712         }
    713         if (baseChar >= 'A' && baseChar <= 'Z') {
    714             return (char)(baseChar | 32);
    715         } else if (baseChar > 127) {
    716             return Character.toLowerCase(baseChar);
    717         }
    718         return baseChar;
    719     }
    720 
    721     /**
    722      * Table mapping most combined Latin, Greek, and Cyrillic characters
    723      * to their base characters.  If c is in range, BASE_CHARS[c] == c
    724      * if c is not a combined character, or the base character if it
    725      * is combined.
    726      */
    727     private static final char BASE_CHARS[] = {
    728         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    729         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
    730         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    731         0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    732         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    733         0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    734         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    735         0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    736         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    737         0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    738         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    739         0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
    740         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
    741         0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
    742         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
    743         0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
    744         0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    745         0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
    746         0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    747         0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
    748         0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
    749         0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
    750         0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
    751         0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
    752         0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
    753         0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
    754         0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
    755         0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
    756                                                                         // Manually changed df to 73
    757         0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
    758         0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
    759         0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
    760         0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
    761         0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
    762         0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
    763         0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
    764         0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
    765         0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
    766         0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
    767         0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
    768         0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
    769         0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
    770         0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
    771         0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
    772         0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
    773         0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
    774         0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
    775         0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
    776         0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
    777         0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
    778         0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
    779         0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
    780         0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
    781         0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
    782         0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
    783         0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
    784         0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
    785         0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
    786         0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
    787         0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
    788         0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
    789         0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
    790         0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
    791         0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
    792         0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
    793         0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
    794         0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
    795         0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
    796         0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
    797         0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
    798         0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
    799         0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
    800         0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
    801         0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
    802         0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
    803         0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
    804         0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
    805         0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
    806         0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
    807         0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
    808         0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
    809         0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
    810         0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
    811         0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
    812         0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
    813         0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
    814         0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
    815         0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
    816         0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
    817         0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
    818         0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
    819         0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
    820         0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
    821         0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
    822         0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
    823         0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
    824         0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
    825         0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
    826         0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
    827         0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
    828         0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
    829         0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
    830         0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
    831         0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
    832         0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
    833         0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
    834         0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
    835         0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
    836         0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
    837         0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
    838         0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
    839         0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
    840         0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
    841         0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
    842         0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
    843         0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
    844         0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
    845         0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
    846         0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
    847         0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
    848         0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
    849         0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
    850         0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
    851         0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
    852         0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
    853         0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
    854         0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
    855         0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
    856         0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
    857         0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
    858         0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
    859         0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
    860         0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
    861         0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
    862         0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
    863         0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
    864         0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
    865         0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
    866         0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
    867         0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
    868         0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
    869         0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
    870         0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
    871         0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
    872         0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
    873         0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
    874         0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
    875         0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
    876         0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
    877         0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
    878         0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
    879         0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
    880         0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
    881         0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
    882         0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
    883         0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
    884         0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
    885         0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
    886         0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
    887         0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
    888         0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
    889     };
    890 
    891     // generated with:
    892     // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
    893 
    894 }
    895