Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 import android.text.TextUtils;
     21 import android.util.Log;
     22 
     23 import com.android.inputmethod.keyboard.ProximityInfo;
     24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     25 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
     26 
     27 import java.util.ArrayList;
     28 import java.util.LinkedList;
     29 
     30 /**
     31  * Base class for an in-memory dictionary that can grow dynamically and can
     32  * be searched for suggestions and valid words.
     33  */
     34 public class ExpandableDictionary extends Dictionary {
     35     private static final String TAG = ExpandableDictionary.class.getSimpleName();
     36     /**
     37      * The weight to give to a word if it's length is the same as the number of typed characters.
     38      */
     39     private static final int FULL_WORD_SCORE_MULTIPLIER = 2;
     40 
     41     // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8.
     42     protected static final int BIGRAM_MAX_FREQUENCY = 255;
     43 
     44     private Context mContext;
     45     private char[] mWordBuilder = new char[Constants.Dictionary.MAX_WORD_LENGTH];
     46     private int mMaxDepth;
     47     private int mInputLength;
     48 
     49     private boolean mRequiresReload;
     50 
     51     private boolean mUpdatingDictionary;
     52 
     53     // Use this lock before touching mUpdatingDictionary & mRequiresDownload
     54     private Object mUpdatingLock = new Object();
     55 
     56     private static final class Node {
     57         Node() {}
     58         char mCode;
     59         int mFrequency;
     60         boolean mTerminal;
     61         Node mParent;
     62         NodeArray mChildren;
     63         ArrayList<char[]> mShortcutTargets;
     64         boolean mShortcutOnly;
     65         LinkedList<NextWord> mNGrams; // Supports ngram
     66     }
     67 
     68     private static final class NodeArray {
     69         Node[] mData;
     70         int mLength = 0;
     71         private static final int INCREMENT = 2;
     72 
     73         NodeArray() {
     74             mData = new Node[INCREMENT];
     75         }
     76 
     77         void add(final Node n) {
     78             if (mLength + 1 > mData.length) {
     79                 Node[] tempData = new Node[mLength + INCREMENT];
     80                 if (mLength > 0) {
     81                     System.arraycopy(mData, 0, tempData, 0, mLength);
     82                 }
     83                 mData = tempData;
     84             }
     85             mData[mLength++] = n;
     86         }
     87     }
     88 
     89     protected interface NextWord {
     90         public Node getWordNode();
     91         public int getFrequency();
     92         public ForgettingCurveParams getFcParams();
     93         public int notifyTypedAgainAndGetFrequency();
     94     }
     95 
     96     private static final class NextStaticWord implements NextWord {
     97         public final Node mWord;
     98         private final int mFrequency;
     99         public NextStaticWord(Node word, int frequency) {
    100             mWord = word;
    101             mFrequency = frequency;
    102         }
    103 
    104         @Override
    105         public Node getWordNode() {
    106             return mWord;
    107         }
    108 
    109         @Override
    110         public int getFrequency() {
    111             return mFrequency;
    112         }
    113 
    114         @Override
    115         public ForgettingCurveParams getFcParams() {
    116             return null;
    117         }
    118 
    119         @Override
    120         public int notifyTypedAgainAndGetFrequency() {
    121             return mFrequency;
    122         }
    123     }
    124 
    125     private static final class NextHistoryWord implements NextWord {
    126         public final Node mWord;
    127         public final ForgettingCurveParams mFcp;
    128 
    129         public NextHistoryWord(Node word, ForgettingCurveParams fcp) {
    130             mWord = word;
    131             mFcp = fcp;
    132         }
    133 
    134         @Override
    135         public Node getWordNode() {
    136             return mWord;
    137         }
    138 
    139         @Override
    140         public int getFrequency() {
    141             return mFcp.getFrequency();
    142         }
    143 
    144         @Override
    145         public ForgettingCurveParams getFcParams() {
    146             return mFcp;
    147         }
    148 
    149         @Override
    150         public int notifyTypedAgainAndGetFrequency() {
    151             return mFcp.notifyTypedAgainAndGetFrequency();
    152         }
    153     }
    154 
    155     private NodeArray mRoots;
    156 
    157     private int[][] mCodes;
    158 
    159     public ExpandableDictionary(final Context context, final String dictType) {
    160         super(dictType);
    161         mContext = context;
    162         clearDictionary();
    163         mCodes = new int[Constants.Dictionary.MAX_WORD_LENGTH][];
    164     }
    165 
    166     public void loadDictionary() {
    167         synchronized (mUpdatingLock) {
    168             startDictionaryLoadingTaskLocked();
    169         }
    170     }
    171 
    172     public void startDictionaryLoadingTaskLocked() {
    173         if (!mUpdatingDictionary) {
    174             mUpdatingDictionary = true;
    175             mRequiresReload = false;
    176             new LoadDictionaryTask().start();
    177         }
    178     }
    179 
    180     public void setRequiresReload(final boolean reload) {
    181         synchronized (mUpdatingLock) {
    182             mRequiresReload = reload;
    183         }
    184     }
    185 
    186     public boolean getRequiresReload() {
    187         return mRequiresReload;
    188     }
    189 
    190     /** Override to load your dictionary here, on a background thread. */
    191     public void loadDictionaryAsync() {
    192         // empty base implementation
    193     }
    194 
    195     public Context getContext() {
    196         return mContext;
    197     }
    198 
    199     public int getMaxWordLength() {
    200         return Constants.Dictionary.MAX_WORD_LENGTH;
    201     }
    202 
    203     public void addWord(final String word, final String shortcutTarget, final int frequency) {
    204         if (word.length() >= Constants.Dictionary.MAX_WORD_LENGTH) {
    205             return;
    206         }
    207         addWordRec(mRoots, word, 0, shortcutTarget, frequency, null);
    208     }
    209 
    210     private void addWordRec(final NodeArray children, final String word, final int depth,
    211             final String shortcutTarget, final int frequency, final Node parentNode) {
    212         final int wordLength = word.length();
    213         if (wordLength <= depth) return;
    214         final char c = word.charAt(depth);
    215         // Does children have the current character?
    216         final int childrenLength = children.mLength;
    217         Node childNode = null;
    218         for (int i = 0; i < childrenLength; i++) {
    219             final Node node = children.mData[i];
    220             if (node.mCode == c) {
    221                 childNode = node;
    222                 break;
    223             }
    224         }
    225         final boolean isShortcutOnly = (null != shortcutTarget);
    226         if (childNode == null) {
    227             childNode = new Node();
    228             childNode.mCode = c;
    229             childNode.mParent = parentNode;
    230             childNode.mShortcutOnly = isShortcutOnly;
    231             children.add(childNode);
    232         }
    233         if (wordLength == depth + 1 && shortcutTarget != null) {
    234             // Terminate this word
    235             childNode.mTerminal = true;
    236             if (isShortcutOnly) {
    237                 if (null == childNode.mShortcutTargets) {
    238                     childNode.mShortcutTargets = CollectionUtils.newArrayList();
    239                 }
    240                 childNode.mShortcutTargets.add(shortcutTarget.toCharArray());
    241             } else {
    242                 childNode.mShortcutOnly = false;
    243             }
    244             childNode.mFrequency = Math.max(frequency, childNode.mFrequency);
    245             if (childNode.mFrequency > 255) childNode.mFrequency = 255;
    246             return;
    247         }
    248         if (childNode.mChildren == null) {
    249             childNode.mChildren = new NodeArray();
    250         }
    251         addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode);
    252     }
    253 
    254     @Override
    255     public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
    256             final String prevWord, final ProximityInfo proximityInfo,
    257             final boolean blockOffensiveWords) {
    258         if (reloadDictionaryIfRequired()) return null;
    259         if (composer.size() > 1) {
    260             if (composer.size() >= Constants.Dictionary.MAX_WORD_LENGTH) {
    261                 return null;
    262             }
    263             final ArrayList<SuggestedWordInfo> suggestions =
    264                     getWordsInner(composer, prevWord, proximityInfo);
    265             return suggestions;
    266         } else {
    267             if (TextUtils.isEmpty(prevWord)) return null;
    268             final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
    269             runBigramReverseLookUp(prevWord, suggestions);
    270             return suggestions;
    271         }
    272     }
    273 
    274     // This reloads the dictionary if required, and returns whether it's currently updating its
    275     // contents or not.
    276     private boolean reloadDictionaryIfRequired() {
    277         synchronized (mUpdatingLock) {
    278             // If we need to update, start off a background task
    279             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    280             return mUpdatingDictionary;
    281         }
    282     }
    283 
    284     protected ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer codes,
    285             final String prevWordForBigrams, final ProximityInfo proximityInfo) {
    286         final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
    287         mInputLength = codes.size();
    288         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
    289         final InputPointers ips = codes.getInputPointers();
    290         final int[] xCoordinates = ips.getXCoordinates();
    291         final int[] yCoordinates = ips.getYCoordinates();
    292         // Cache the codes so that we don't have to lookup an array list
    293         for (int i = 0; i < mInputLength; i++) {
    294             // TODO: Calculate proximity info here.
    295             if (mCodes[i] == null || mCodes[i].length < 1) {
    296                 mCodes[i] = new int[ProximityInfo.MAX_PROXIMITY_CHARS_SIZE];
    297             }
    298             final int x = xCoordinates != null && i < xCoordinates.length ?
    299                     xCoordinates[i] : Constants.NOT_A_COORDINATE;
    300             final int y = xCoordinates != null && i < yCoordinates.length ?
    301                     yCoordinates[i] : Constants.NOT_A_COORDINATE;
    302             proximityInfo.fillArrayWithNearestKeyCodes(x, y, codes.getCodeAt(i), mCodes[i]);
    303         }
    304         mMaxDepth = mInputLength * 3;
    305         getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, suggestions);
    306         for (int i = 0; i < mInputLength; i++) {
    307             getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, suggestions);
    308         }
    309         return suggestions;
    310     }
    311 
    312     @Override
    313     public synchronized boolean isValidWord(final String word) {
    314         synchronized (mUpdatingLock) {
    315             // If we need to update, start off a background task
    316             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    317             if (mUpdatingDictionary) return false;
    318         }
    319         final Node node = searchNode(mRoots, word, 0, word.length());
    320         // If node is null, we didn't find the word, so it's not valid.
    321         // If node.mShortcutOnly is true, then it exists as a shortcut but not as a word,
    322         // so that means it's not a valid word.
    323         // If node.mShortcutOnly is false, then it exists as a word (it may also exist as
    324         // a shortcut, but this does not matter), so it's a valid word.
    325         return (node == null) ? false : !node.mShortcutOnly;
    326     }
    327 
    328     protected boolean removeBigram(final String word1, final String word2) {
    329         // Refer to addOrSetBigram() about word1.toLowerCase()
    330         final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
    331         final Node secondWord = searchWord(mRoots, word2, 0, null);
    332         LinkedList<NextWord> bigrams = firstWord.mNGrams;
    333         NextWord bigramNode = null;
    334         if (bigrams == null || bigrams.size() == 0) {
    335             return false;
    336         } else {
    337             for (NextWord nw : bigrams) {
    338                 if (nw.getWordNode() == secondWord) {
    339                     bigramNode = nw;
    340                     break;
    341                 }
    342             }
    343         }
    344         if (bigramNode == null) {
    345             return false;
    346         }
    347         return bigrams.remove(bigramNode);
    348     }
    349 
    350     /**
    351      * Returns the word's frequency or -1 if not found
    352      */
    353     protected int getWordFrequency(final String word) {
    354         // Case-sensitive search
    355         final Node node = searchNode(mRoots, word, 0, word.length());
    356         return (node == null) ? -1 : node.mFrequency;
    357     }
    358 
    359     protected NextWord getBigramWord(final String word1, final String word2) {
    360         // Refer to addOrSetBigram() about word1.toLowerCase()
    361         final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
    362         final Node secondWord = searchWord(mRoots, word2, 0, null);
    363         LinkedList<NextWord> bigrams = firstWord.mNGrams;
    364         if (bigrams == null || bigrams.size() == 0) {
    365             return null;
    366         } else {
    367             for (NextWord nw : bigrams) {
    368                 if (nw.getWordNode() == secondWord) {
    369                     return nw;
    370                 }
    371             }
    372         }
    373         return null;
    374     }
    375 
    376     private static int computeSkippedWordFinalFreq(final int freq, final int snr,
    377             final int inputLength) {
    378         // The computation itself makes sense for >= 2, but the == 2 case returns 0
    379         // anyway so we may as well test against 3 instead and return the constant
    380         if (inputLength >= 3) {
    381             return (freq * snr * (inputLength - 2)) / (inputLength - 1);
    382         } else {
    383             return 0;
    384         }
    385     }
    386 
    387     /**
    388      * Helper method to add a word and its shortcuts.
    389      *
    390      * @param node the terminal node
    391      * @param word the word to insert, as an array of code points
    392      * @param depth the depth of the node in the tree
    393      * @param finalFreq the frequency for this word
    394      * @param suggestions the suggestion collection to add the suggestions to
    395      * @return whether there is still space for more words.
    396      */
    397     private boolean addWordAndShortcutsFromNode(final Node node, final char[] word, final int depth,
    398             final int finalFreq, final ArrayList<SuggestedWordInfo> suggestions) {
    399         if (finalFreq > 0 && !node.mShortcutOnly) {
    400             // Use KIND_CORRECTION always. This dictionary does not really have a notion of
    401             // COMPLETION against CORRECTION; we could artificially add one by looking at
    402             // the respective size of the typed word and the suggestion if it matters sometime
    403             // in the future.
    404             suggestions.add(new SuggestedWordInfo(new String(word, 0, depth + 1), finalFreq,
    405                     SuggestedWordInfo.KIND_CORRECTION, mDictType));
    406             if (suggestions.size() >= Suggest.MAX_SUGGESTIONS) return false;
    407         }
    408         if (null != node.mShortcutTargets) {
    409             final int length = node.mShortcutTargets.size();
    410             for (int shortcutIndex = 0; shortcutIndex < length; ++shortcutIndex) {
    411                 final char[] shortcut = node.mShortcutTargets.get(shortcutIndex);
    412                 suggestions.add(new SuggestedWordInfo(new String(shortcut, 0, shortcut.length),
    413                         finalFreq, SuggestedWordInfo.KIND_SHORTCUT, mDictType));
    414                 if (suggestions.size() > Suggest.MAX_SUGGESTIONS) return false;
    415             }
    416         }
    417         return true;
    418     }
    419 
    420     /**
    421      * Recursively traverse the tree for words that match the input. Input consists of
    422      * a list of arrays. Each item in the list is one input character position. An input
    423      * character is actually an array of multiple possible candidates. This function is not
    424      * optimized for speed, assuming that the user dictionary will only be a few hundred words in
    425      * size.
    426      * @param roots node whose children have to be search for matches
    427      * @param codes the input character codes
    428      * @param word the word being composed as a possible match
    429      * @param depth the depth of traversal - the length of the word being composed thus far
    430      * @param completion whether the traversal is now in completion mode - meaning that we've
    431      * exhausted the input and we're looking for all possible suffixes.
    432      * @param snr current weight of the word being formed
    433      * @param inputIndex position in the input characters. This can be off from the depth in
    434      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
    435      * "wouldve", it could be matching "would've", so the depth will be one more than the
    436      * inputIndex
    437      * @param suggestions the list in which to add suggestions
    438      */
    439     // TODO: Share this routine with the native code for BinaryDictionary
    440     protected void getWordsRec(final NodeArray roots, final WordComposer codes, final char[] word,
    441             final int depth, final boolean completion, final int snr, final int inputIndex,
    442             final int skipPos, final ArrayList<SuggestedWordInfo> suggestions) {
    443         final int count = roots.mLength;
    444         final int codeSize = mInputLength;
    445         // Optimization: Prune out words that are too long compared to how much was typed.
    446         if (depth > mMaxDepth) {
    447             return;
    448         }
    449         final int[] currentChars;
    450         if (codeSize <= inputIndex) {
    451             currentChars = null;
    452         } else {
    453             currentChars = mCodes[inputIndex];
    454         }
    455 
    456         for (int i = 0; i < count; i++) {
    457             final Node node = roots.mData[i];
    458             final char c = node.mCode;
    459             final char lowerC = toLowerCase(c);
    460             final boolean terminal = node.mTerminal;
    461             final NodeArray children = node.mChildren;
    462             final int freq = node.mFrequency;
    463             if (completion || currentChars == null) {
    464                 word[depth] = c;
    465                 if (terminal) {
    466                     final int finalFreq;
    467                     if (skipPos < 0) {
    468                         finalFreq = freq * snr;
    469                     } else {
    470                         finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
    471                     }
    472                     if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, suggestions)) {
    473                         // No space left in the queue, bail out
    474                         return;
    475                     }
    476                 }
    477                 if (children != null) {
    478                     getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex,
    479                             skipPos, suggestions);
    480                 }
    481             } else if ((c == Constants.CODE_SINGLE_QUOTE
    482                     && currentChars[0] != Constants.CODE_SINGLE_QUOTE) || depth == skipPos) {
    483                 // Skip the ' and continue deeper
    484                 word[depth] = c;
    485                 if (children != null) {
    486                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
    487                             skipPos, suggestions);
    488                 }
    489             } else {
    490                 // Don't use alternatives if we're looking for missing characters
    491                 final int alternativesSize = skipPos >= 0 ? 1 : currentChars.length;
    492                 for (int j = 0; j < alternativesSize; j++) {
    493                     final int addedAttenuation = (j > 0 ? 1 : 2);
    494                     final int currentChar = currentChars[j];
    495                     if (currentChar == Constants.NOT_A_CODE) {
    496                         break;
    497                     }
    498                     if (currentChar == lowerC || currentChar == c) {
    499                         word[depth] = c;
    500 
    501                         if (codeSize == inputIndex + 1) {
    502                             if (terminal) {
    503                                 final int finalFreq;
    504                                 if (skipPos < 0) {
    505                                     finalFreq = freq * snr * addedAttenuation
    506                                             * FULL_WORD_SCORE_MULTIPLIER;
    507                                 } else {
    508                                     finalFreq = computeSkippedWordFinalFreq(freq,
    509                                             snr * addedAttenuation, mInputLength);
    510                                 }
    511                                 if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq,
    512                                         suggestions)) {
    513                                     // No space left in the queue, bail out
    514                                     return;
    515                                 }
    516                             }
    517                             if (children != null) {
    518                                 getWordsRec(children, codes, word, depth + 1,
    519                                         true, snr * addedAttenuation, inputIndex + 1,
    520                                         skipPos, suggestions);
    521                             }
    522                         } else if (children != null) {
    523                             getWordsRec(children, codes, word, depth + 1,
    524                                     false, snr * addedAttenuation, inputIndex + 1,
    525                                     skipPos, suggestions);
    526                         }
    527                     }
    528                 }
    529             }
    530         }
    531     }
    532 
    533     public int setBigramAndGetFrequency(final String word1, final String word2,
    534             final int frequency) {
    535         return setBigramAndGetFrequency(word1, word2, frequency, null /* unused */);
    536     }
    537 
    538     public int setBigramAndGetFrequency(final String word1, final String word2,
    539             final ForgettingCurveParams fcp) {
    540         return setBigramAndGetFrequency(word1, word2, 0 /* unused */, fcp);
    541     }
    542 
    543     /**
    544      * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
    545      * @param word1 the first word of this bigram
    546      * @param word2 the second word of this bigram
    547      * @param frequency frequency for this bigram
    548      * @param fcp an instance of ForgettingCurveParams to use for decay policy
    549      * @return returns the final bigram frequency
    550      */
    551     private int setBigramAndGetFrequency(final String word1, final String word2,
    552             final int frequency, final ForgettingCurveParams fcp) {
    553         // We don't want results to be different according to case of the looked up left hand side
    554         // word. We do want however to return the correct case for the right hand side.
    555         // So we want to squash the case of the left hand side, and preserve that of the right
    556         // hand side word.
    557         final String word1Lower = word1.toLowerCase();
    558         if (TextUtils.isEmpty(word1Lower) || TextUtils.isEmpty(word2)) {
    559             Log.e(TAG, "Invalid bigram pair: " + word1 + ", " + word1Lower + ", " + word2);
    560             return frequency;
    561         }
    562         final Node firstWord = searchWord(mRoots, word1Lower, 0, null);
    563         final Node secondWord = searchWord(mRoots, word2, 0, null);
    564         LinkedList<NextWord> bigrams = firstWord.mNGrams;
    565         if (bigrams == null || bigrams.size() == 0) {
    566             firstWord.mNGrams = CollectionUtils.newLinkedList();
    567             bigrams = firstWord.mNGrams;
    568         } else {
    569             for (NextWord nw : bigrams) {
    570                 if (nw.getWordNode() == secondWord) {
    571                     return nw.notifyTypedAgainAndGetFrequency();
    572                 }
    573             }
    574         }
    575         if (fcp != null) {
    576             // history
    577             firstWord.mNGrams.add(new NextHistoryWord(secondWord, fcp));
    578         } else {
    579             firstWord.mNGrams.add(new NextStaticWord(secondWord, frequency));
    580         }
    581         return frequency;
    582     }
    583 
    584     /**
    585      * Searches for the word and add the word if it does not exist.
    586      * @return Returns the terminal node of the word we are searching for.
    587      */
    588     private Node searchWord(final NodeArray children, final String word, final int depth,
    589             final Node parentNode) {
    590         final int wordLength = word.length();
    591         final char c = word.charAt(depth);
    592         // Does children have the current character?
    593         final int childrenLength = children.mLength;
    594         Node childNode = null;
    595         for (int i = 0; i < childrenLength; i++) {
    596             final Node node = children.mData[i];
    597             if (node.mCode == c) {
    598                 childNode = node;
    599                 break;
    600             }
    601         }
    602         if (childNode == null) {
    603             childNode = new Node();
    604             childNode.mCode = c;
    605             childNode.mParent = parentNode;
    606             children.add(childNode);
    607         }
    608         if (wordLength == depth + 1) {
    609             // Terminate this word
    610             childNode.mTerminal = true;
    611             return childNode;
    612         }
    613         if (childNode.mChildren == null) {
    614             childNode.mChildren = new NodeArray();
    615         }
    616         return searchWord(childNode.mChildren, word, depth + 1, childNode);
    617     }
    618 
    619     private void runBigramReverseLookUp(final String previousWord,
    620             final ArrayList<SuggestedWordInfo> suggestions) {
    621         // Search for the lowercase version of the word only, because that's where bigrams
    622         // store their sons.
    623         final Node prevWord = searchNode(mRoots, previousWord.toLowerCase(), 0,
    624                 previousWord.length());
    625         if (prevWord != null && prevWord.mNGrams != null) {
    626             reverseLookUp(prevWord.mNGrams, suggestions);
    627         }
    628     }
    629 
    630     // Local to reverseLookUp, but do not allocate each time.
    631     private final char[] mLookedUpString = new char[Constants.Dictionary.MAX_WORD_LENGTH];
    632 
    633     /**
    634      * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
    635      * to the suggestions list passed as an argument.
    636      * @param terminalNodes list of terminal nodes we want to add
    637      * @param suggestions the suggestion collection to add the word to
    638      */
    639     private void reverseLookUp(final LinkedList<NextWord> terminalNodes,
    640             final ArrayList<SuggestedWordInfo> suggestions) {
    641         Node node;
    642         int freq;
    643         for (NextWord nextWord : terminalNodes) {
    644             node = nextWord.getWordNode();
    645             freq = nextWord.getFrequency();
    646             int index = Constants.Dictionary.MAX_WORD_LENGTH;
    647             do {
    648                 --index;
    649                 mLookedUpString[index] = node.mCode;
    650                 node = node.mParent;
    651             } while (node != null && index > 0);
    652 
    653             // If node is null, we have a word longer than MAX_WORD_LENGTH in the dictionary.
    654             // It's a little unclear how this can happen, but just in case it does it's safer
    655             // to ignore the word in this case.
    656             if (freq >= 0 && node == null) {
    657                 suggestions.add(new SuggestedWordInfo(new String(mLookedUpString, index,
    658                         Constants.Dictionary.MAX_WORD_LENGTH - index),
    659                         freq, SuggestedWordInfo.KIND_CORRECTION, mDictType));
    660             }
    661         }
    662     }
    663 
    664     /**
    665      * Recursively search for the terminal node of the word.
    666      *
    667      * One iteration takes the full word to search for and the current index of the recursion.
    668      *
    669      * @param children the node of the trie to search under.
    670      * @param word the word to search for. Only read [offset..length] so there may be trailing chars
    671      * @param offset the index in {@code word} this recursion should operate on.
    672      * @param length the length of the input word.
    673      * @return Returns the terminal node of the word if the word exists
    674      */
    675     private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
    676             final int length) {
    677         final int count = children.mLength;
    678         final char currentChar = word.charAt(offset);
    679         for (int j = 0; j < count; j++) {
    680             final Node node = children.mData[j];
    681             if (node.mCode == currentChar) {
    682                 if (offset == length - 1) {
    683                     if (node.mTerminal) {
    684                         return node;
    685                     }
    686                 } else {
    687                     if (node.mChildren != null) {
    688                         Node returnNode = searchNode(node.mChildren, word, offset + 1, length);
    689                         if (returnNode != null) return returnNode;
    690                     }
    691                 }
    692             }
    693         }
    694         return null;
    695     }
    696 
    697     protected void clearDictionary() {
    698         mRoots = new NodeArray();
    699     }
    700 
    701     private final class LoadDictionaryTask extends Thread {
    702         LoadDictionaryTask() {}
    703         @Override
    704         public void run() {
    705             loadDictionaryAsync();
    706             synchronized (mUpdatingLock) {
    707                 mUpdatingDictionary = false;
    708             }
    709         }
    710     }
    711 
    712     private static char toLowerCase(final char c) {
    713         char baseChar = c;
    714         if (c < BASE_CHARS.length) {
    715             baseChar = BASE_CHARS[c];
    716         }
    717         if (baseChar >= 'A' && baseChar <= 'Z') {
    718             return (char)(baseChar | 32);
    719         } else if (baseChar > 127) {
    720             return Character.toLowerCase(baseChar);
    721         }
    722         return baseChar;
    723     }
    724 
    725     /**
    726      * Table mapping most combined Latin, Greek, and Cyrillic characters
    727      * to their base characters.  If c is in range, BASE_CHARS[c] == c
    728      * if c is not a combined character, or the base character if it
    729      * is combined.
    730      */
    731     private static final char BASE_CHARS[] = {
    732         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    733         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
    734         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    735         0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    736         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    737         0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    738         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    739         0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    740         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    741         0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    742         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    743         0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
    744         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
    745         0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
    746         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
    747         0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
    748         0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    749         0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
    750         0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    751         0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
    752         0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
    753         0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
    754         0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
    755         0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
    756         0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
    757         0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
    758         0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
    759         0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
    760                                                                         // Manually changed df to 73
    761         0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
    762         0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
    763         0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
    764         0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
    765         0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
    766         0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
    767         0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
    768         0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
    769         0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
    770         0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
    771         0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
    772         0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
    773         0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
    774         0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
    775         0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
    776         0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
    777         0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
    778         0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
    779         0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
    780         0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
    781         0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
    782         0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
    783         0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
    784         0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
    785         0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
    786         0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
    787         0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
    788         0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
    789         0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
    790         0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
    791         0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
    792         0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
    793         0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
    794         0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
    795         0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
    796         0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
    797         0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
    798         0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
    799         0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
    800         0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
    801         0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
    802         0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
    803         0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
    804         0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
    805         0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
    806         0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
    807         0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
    808         0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
    809         0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
    810         0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
    811         0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
    812         0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
    813         0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
    814         0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
    815         0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
    816         0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
    817         0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
    818         0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
    819         0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
    820         0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
    821         0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
    822         0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
    823         0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
    824         0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
    825         0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
    826         0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
    827         0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
    828         0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
    829         0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
    830         0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
    831         0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
    832         0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
    833         0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
    834         0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
    835         0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
    836         0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
    837         0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
    838         0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
    839         0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
    840         0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
    841         0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
    842         0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
    843         0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
    844         0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
    845         0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
    846         0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
    847         0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
    848         0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
    849         0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
    850         0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
    851         0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
    852         0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
    853         0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
    854         0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
    855         0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
    856         0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
    857         0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
    858         0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
    859         0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
    860         0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
    861         0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
    862         0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
    863         0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
    864         0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
    865         0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
    866         0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
    867         0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
    868         0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
    869         0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
    870         0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
    871         0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
    872         0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
    873         0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
    874         0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
    875         0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
    876         0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
    877         0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
    878         0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
    879         0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
    880         0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
    881         0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
    882         0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
    883         0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
    884         0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
    885         0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
    886         0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
    887         0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
    888         0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
    889         0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
    890         0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
    891         0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
    892         0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
    893     };
    894 
    895     // generated with:
    896     // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
    897 
    898 }
    899