Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 
     21 import com.android.inputmethod.keyboard.Keyboard;
     22 import com.android.inputmethod.keyboard.ProximityInfo;
     23 
     24 import java.util.LinkedList;
     25 
     26 /**
     27  * Base class for an in-memory dictionary that can grow dynamically and can
     28  * be searched for suggestions and valid words.
     29  */
     30 public class ExpandableDictionary extends Dictionary {
     31     /**
     32      * There is difference between what java and native code can handle.
     33      * It uses 32 because Java stack overflows when greater value is used.
     34      */
     35     protected static final int MAX_WORD_LENGTH = 32;
     36 
     37     // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8.
     38     protected static final int BIGRAM_MAX_FREQUENCY = 255;
     39 
     40     private Context mContext;
     41     private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
     42     private int mDicTypeId;
     43     private int mMaxDepth;
     44     private int mInputLength;
     45 
     46     private boolean mRequiresReload;
     47 
     48     private boolean mUpdatingDictionary;
     49 
     50     // Use this lock before touching mUpdatingDictionary & mRequiresDownload
     51     private Object mUpdatingLock = new Object();
     52 
     53     private static class Node {
     54         char mCode;
     55         int mFrequency;
     56         boolean mTerminal;
     57         Node mParent;
     58         NodeArray mChildren;
     59         LinkedList<NextWord> mNGrams; // Supports ngram
     60     }
     61 
     62     private static class NodeArray {
     63         Node[] mData;
     64         int mLength = 0;
     65         private static final int INCREMENT = 2;
     66 
     67         NodeArray() {
     68             mData = new Node[INCREMENT];
     69         }
     70 
     71         void add(Node n) {
     72             if (mLength + 1 > mData.length) {
     73                 Node[] tempData = new Node[mLength + INCREMENT];
     74                 if (mLength > 0) {
     75                     System.arraycopy(mData, 0, tempData, 0, mLength);
     76                 }
     77                 mData = tempData;
     78             }
     79             mData[mLength++] = n;
     80         }
     81     }
     82 
     83     private static class NextWord {
     84         public final Node mWord;
     85         private int mFrequency;
     86 
     87         public NextWord(Node word, int frequency) {
     88             mWord = word;
     89             mFrequency = frequency;
     90         }
     91 
     92         public int getFrequency() {
     93             return mFrequency;
     94         }
     95 
     96         public int setFrequency(int freq) {
     97             mFrequency = freq;
     98             return mFrequency;
     99         }
    100 
    101         public int addFrequency(int add) {
    102             mFrequency += add;
    103             if (mFrequency > BIGRAM_MAX_FREQUENCY) mFrequency = BIGRAM_MAX_FREQUENCY;
    104             return mFrequency;
    105         }
    106     }
    107 
    108     private NodeArray mRoots;
    109 
    110     private int[][] mCodes;
    111 
    112     public ExpandableDictionary(Context context, int dicTypeId) {
    113         mContext = context;
    114         clearDictionary();
    115         mCodes = new int[MAX_WORD_LENGTH][];
    116         mDicTypeId = dicTypeId;
    117     }
    118 
    119     public void loadDictionary() {
    120         synchronized (mUpdatingLock) {
    121             startDictionaryLoadingTaskLocked();
    122         }
    123     }
    124 
    125     public void startDictionaryLoadingTaskLocked() {
    126         if (!mUpdatingDictionary) {
    127             mUpdatingDictionary = true;
    128             mRequiresReload = false;
    129             new LoadDictionaryTask().start();
    130         }
    131     }
    132 
    133     public void setRequiresReload(boolean reload) {
    134         synchronized (mUpdatingLock) {
    135             mRequiresReload = reload;
    136         }
    137     }
    138 
    139     public boolean getRequiresReload() {
    140         return mRequiresReload;
    141     }
    142 
    143     /** Override to load your dictionary here, on a background thread. */
    144     public void loadDictionaryAsync() {
    145         // empty base implementation
    146     }
    147 
    148     public Context getContext() {
    149         return mContext;
    150     }
    151 
    152     public int getMaxWordLength() {
    153         return MAX_WORD_LENGTH;
    154     }
    155 
    156     public void addWord(String word, int frequency) {
    157         addWordRec(mRoots, word, 0, frequency, null);
    158     }
    159 
    160     private void addWordRec(NodeArray children, final String word, final int depth,
    161             final int frequency, Node parentNode) {
    162         final int wordLength = word.length();
    163         if (wordLength <= depth) return;
    164         final char c = word.charAt(depth);
    165         // Does children have the current character?
    166         final int childrenLength = children.mLength;
    167         Node childNode = null;
    168         for (int i = 0; i < childrenLength; i++) {
    169             final Node node = children.mData[i];
    170             if (node.mCode == c) {
    171                 childNode = node;
    172                 break;
    173             }
    174         }
    175         if (childNode == null) {
    176             childNode = new Node();
    177             childNode.mCode = c;
    178             childNode.mParent = parentNode;
    179             children.add(childNode);
    180         }
    181         if (wordLength == depth + 1) {
    182             // Terminate this word
    183             childNode.mTerminal = true;
    184             childNode.mFrequency = Math.max(frequency, childNode.mFrequency);
    185             if (childNode.mFrequency > 255) childNode.mFrequency = 255;
    186             return;
    187         }
    188         if (childNode.mChildren == null) {
    189             childNode.mChildren = new NodeArray();
    190         }
    191         addWordRec(childNode.mChildren, word, depth + 1, frequency, childNode);
    192     }
    193 
    194     @Override
    195     public void getWords(final WordComposer codes, final WordCallback callback,
    196             final ProximityInfo proximityInfo) {
    197         synchronized (mUpdatingLock) {
    198             // If we need to update, start off a background task
    199             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    200             // Currently updating contacts, don't return any results.
    201             if (mUpdatingDictionary) return;
    202         }
    203         getWordsInner(codes, callback, proximityInfo);
    204     }
    205 
    206     protected final void getWordsInner(final WordComposer codes, final WordCallback callback,
    207             @SuppressWarnings("unused") final ProximityInfo proximityInfo) {
    208         mInputLength = codes.size();
    209         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
    210         // Cache the codes so that we don't have to lookup an array list
    211         for (int i = 0; i < mInputLength; i++) {
    212             mCodes[i] = codes.getCodesAt(i);
    213         }
    214         mMaxDepth = mInputLength * 3;
    215         getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback);
    216         for (int i = 0; i < mInputLength; i++) {
    217             getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback);
    218         }
    219     }
    220 
    221     @Override
    222     public synchronized boolean isValidWord(CharSequence word) {
    223         synchronized (mUpdatingLock) {
    224             // If we need to update, start off a background task
    225             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    226             if (mUpdatingDictionary) return false;
    227         }
    228         return getWordFrequency(word) > -1;
    229     }
    230 
    231     /**
    232      * Returns the word's frequency or -1 if not found
    233      */
    234     protected int getWordFrequency(CharSequence word) {
    235         // Case-sensitive search
    236         Node node = searchNode(mRoots, word, 0, word.length());
    237         return (node == null) ? -1 : node.mFrequency;
    238     }
    239 
    240     private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
    241         // The computation itself makes sense for >= 2, but the == 2 case returns 0
    242         // anyway so we may as well test against 3 instead and return the constant
    243         if (inputLength >= 3) {
    244             return (freq * snr * (inputLength - 2)) / (inputLength - 1);
    245         } else {
    246             return 0;
    247         }
    248     }
    249 
    250     /**
    251      * Recursively traverse the tree for words that match the input. Input consists of
    252      * a list of arrays. Each item in the list is one input character position. An input
    253      * character is actually an array of multiple possible candidates. This function is not
    254      * optimized for speed, assuming that the user dictionary will only be a few hundred words in
    255      * size.
    256      * @param roots node whose children have to be search for matches
    257      * @param codes the input character codes
    258      * @param word the word being composed as a possible match
    259      * @param depth the depth of traversal - the length of the word being composed thus far
    260      * @param completion whether the traversal is now in completion mode - meaning that we've
    261      * exhausted the input and we're looking for all possible suffixes.
    262      * @param snr current weight of the word being formed
    263      * @param inputIndex position in the input characters. This can be off from the depth in
    264      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
    265      * "wouldve", it could be matching "would've", so the depth will be one more than the
    266      * inputIndex
    267      * @param callback the callback class for adding a word
    268      */
    269     // TODO: Share this routine with the native code for BinaryDictionary
    270     protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
    271             final int depth, final boolean completion, int snr, int inputIndex, int skipPos,
    272             WordCallback callback) {
    273         final int count = roots.mLength;
    274         final int codeSize = mInputLength;
    275         // Optimization: Prune out words that are too long compared to how much was typed.
    276         if (depth > mMaxDepth) {
    277             return;
    278         }
    279         final int[] currentChars;
    280         if (codeSize <= inputIndex) {
    281             currentChars = null;
    282         } else {
    283             currentChars = mCodes[inputIndex];
    284         }
    285 
    286         for (int i = 0; i < count; i++) {
    287             final Node node = roots.mData[i];
    288             final char c = node.mCode;
    289             final char lowerC = toLowerCase(c);
    290             final boolean terminal = node.mTerminal;
    291             final NodeArray children = node.mChildren;
    292             final int freq = node.mFrequency;
    293             if (completion || currentChars == null) {
    294                 word[depth] = c;
    295                 if (terminal) {
    296                     final int finalFreq;
    297                     if (skipPos < 0) {
    298                         finalFreq = freq * snr;
    299                     } else {
    300                         finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
    301                     }
    302                     if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
    303                             DataType.UNIGRAM)) {
    304                         return;
    305                     }
    306                 }
    307                 if (children != null) {
    308                     getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex,
    309                             skipPos, callback);
    310                 }
    311             } else if ((c == Keyboard.CODE_SINGLE_QUOTE
    312                     && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) {
    313                 // Skip the ' and continue deeper
    314                 word[depth] = c;
    315                 if (children != null) {
    316                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
    317                             skipPos, callback);
    318                 }
    319             } else {
    320                 // Don't use alternatives if we're looking for missing characters
    321                 final int alternativesSize = skipPos >= 0? 1 : currentChars.length;
    322                 for (int j = 0; j < alternativesSize; j++) {
    323                     final int addedAttenuation = (j > 0 ? 1 : 2);
    324                     final int currentChar = currentChars[j];
    325                     if (currentChar == -1) {
    326                         break;
    327                     }
    328                     if (currentChar == lowerC || currentChar == c) {
    329                         word[depth] = c;
    330 
    331                         if (codeSize == inputIndex + 1) {
    332                             if (terminal) {
    333                                 if (INCLUDE_TYPED_WORD_IF_VALID
    334                                         || !same(word, depth + 1, codes.getTypedWord())) {
    335                                     final int finalFreq;
    336                                     if (skipPos < 0) {
    337                                         finalFreq = freq * snr * addedAttenuation
    338                                                 * FULL_WORD_SCORE_MULTIPLIER;
    339                                     } else {
    340                                         finalFreq = computeSkippedWordFinalFreq(freq,
    341                                                 snr * addedAttenuation, mInputLength);
    342                                     }
    343                                     callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
    344                                             DataType.UNIGRAM);
    345                                 }
    346                             }
    347                             if (children != null) {
    348                                 getWordsRec(children, codes, word, depth + 1,
    349                                         true, snr * addedAttenuation, inputIndex + 1,
    350                                         skipPos, callback);
    351                             }
    352                         } else if (children != null) {
    353                             getWordsRec(children, codes, word, depth + 1,
    354                                     false, snr * addedAttenuation, inputIndex + 1,
    355                                     skipPos, callback);
    356                         }
    357                     }
    358                 }
    359             }
    360         }
    361     }
    362 
    363     protected int setBigram(String word1, String word2, int frequency) {
    364         return addOrSetBigram(word1, word2, frequency, false);
    365     }
    366 
    367     protected int addBigram(String word1, String word2, int frequency) {
    368         return addOrSetBigram(word1, word2, frequency, true);
    369     }
    370 
    371     /**
    372      * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
    373      * @param frequency frequency for this bigram
    374      * @param addFrequency if true, it adds to current frequency, else it overwrites the old value
    375      * @return returns the final frequency
    376      */
    377     private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) {
    378         // We don't want results to be different according to case of the looked up left hand side
    379         // word. We do want however to return the correct case for the right hand side.
    380         // So we want to squash the case of the left hand side, and preserve that of the right
    381         // hand side word.
    382         Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
    383         Node secondWord = searchWord(mRoots, word2, 0, null);
    384         LinkedList<NextWord> bigram = firstWord.mNGrams;
    385         if (bigram == null || bigram.size() == 0) {
    386             firstWord.mNGrams = new LinkedList<NextWord>();
    387             bigram = firstWord.mNGrams;
    388         } else {
    389             for (NextWord nw : bigram) {
    390                 if (nw.mWord == secondWord) {
    391                     if (addFrequency) {
    392                         return nw.addFrequency(frequency);
    393                     } else {
    394                         return nw.setFrequency(frequency);
    395                     }
    396                 }
    397             }
    398         }
    399         firstWord.mNGrams.add(new NextWord(secondWord, frequency));
    400         return frequency;
    401     }
    402 
    403     /**
    404      * Searches for the word and add the word if it does not exist.
    405      * @return Returns the terminal node of the word we are searching for.
    406      */
    407     private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
    408         final int wordLength = word.length();
    409         final char c = word.charAt(depth);
    410         // Does children have the current character?
    411         final int childrenLength = children.mLength;
    412         Node childNode = null;
    413         for (int i = 0; i < childrenLength; i++) {
    414             final Node node = children.mData[i];
    415             if (node.mCode == c) {
    416                 childNode = node;
    417                 break;
    418             }
    419         }
    420         if (childNode == null) {
    421             childNode = new Node();
    422             childNode.mCode = c;
    423             childNode.mParent = parentNode;
    424             children.add(childNode);
    425         }
    426         if (wordLength == depth + 1) {
    427             // Terminate this word
    428             childNode.mTerminal = true;
    429             return childNode;
    430         }
    431         if (childNode.mChildren == null) {
    432             childNode.mChildren = new NodeArray();
    433         }
    434         return searchWord(childNode.mChildren, word, depth + 1, childNode);
    435     }
    436 
    437     // @VisibleForTesting
    438     boolean reloadDictionaryIfRequired() {
    439         synchronized (mUpdatingLock) {
    440             // If we need to update, start off a background task
    441             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    442             // Currently updating contacts, don't return any results.
    443             return mUpdatingDictionary;
    444         }
    445     }
    446 
    447     private void runBigramReverseLookUp(final CharSequence previousWord,
    448             final WordCallback callback) {
    449         // Search for the lowercase version of the word only, because that's where bigrams
    450         // store their sons.
    451         Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0,
    452                 previousWord.length());
    453         if (prevWord != null && prevWord.mNGrams != null) {
    454             reverseLookUp(prevWord.mNGrams, callback);
    455         }
    456     }
    457 
    458     @Override
    459     public void getBigrams(final WordComposer codes, final CharSequence previousWord,
    460             final WordCallback callback) {
    461         if (!reloadDictionaryIfRequired()) {
    462             runBigramReverseLookUp(previousWord, callback);
    463         }
    464     }
    465 
    466     /**
    467      * Used for testing purposes and in the spell checker
    468      * This function will wait for loading from database to be done
    469      */
    470     void waitForDictionaryLoading() {
    471         while (mUpdatingDictionary) {
    472             try {
    473                 Thread.sleep(100);
    474             } catch (InterruptedException e) {
    475                 //
    476             }
    477         }
    478     }
    479 
    480     protected final void blockingReloadDictionaryIfRequired() {
    481         reloadDictionaryIfRequired();
    482         waitForDictionaryLoading();
    483     }
    484 
    485     // Local to reverseLookUp, but do not allocate each time.
    486     private final char[] mLookedUpString = new char[MAX_WORD_LENGTH];
    487 
    488     /**
    489      * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
    490      * through callback.
    491      * @param terminalNodes list of terminal nodes we want to add
    492      */
    493     private void reverseLookUp(LinkedList<NextWord> terminalNodes,
    494             final WordCallback callback) {
    495         Node node;
    496         int freq;
    497         for (NextWord nextWord : terminalNodes) {
    498             node = nextWord.mWord;
    499             freq = nextWord.getFrequency();
    500             int index = MAX_WORD_LENGTH;
    501             do {
    502                 --index;
    503                 mLookedUpString[index] = node.mCode;
    504                 node = node.mParent;
    505             } while (node != null);
    506 
    507             callback.addWord(mLookedUpString, index, MAX_WORD_LENGTH - index, freq, mDicTypeId,
    508                     DataType.BIGRAM);
    509         }
    510     }
    511 
    512     /**
    513      * Recursively search for the terminal node of the word.
    514      *
    515      * One iteration takes the full word to search for and the current index of the recursion.
    516      *
    517      * @param children the node of the trie to search under.
    518      * @param word the word to search for. Only read [offset..length] so there may be trailing chars
    519      * @param offset the index in {@code word} this recursion should operate on.
    520      * @param length the length of the input word.
    521      * @return Returns the terminal node of the word if the word exists
    522      */
    523     private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
    524             final int length) {
    525         final int count = children.mLength;
    526         final char currentChar = word.charAt(offset);
    527         for (int j = 0; j < count; j++) {
    528             final Node node = children.mData[j];
    529             if (node.mCode == currentChar) {
    530                 if (offset == length - 1) {
    531                     if (node.mTerminal) {
    532                         return node;
    533                     }
    534                 } else {
    535                     if (node.mChildren != null) {
    536                         Node returnNode = searchNode(node.mChildren, word, offset + 1, length);
    537                         if (returnNode != null) return returnNode;
    538                     }
    539                 }
    540             }
    541         }
    542         return null;
    543     }
    544 
    545     protected void clearDictionary() {
    546         mRoots = new NodeArray();
    547     }
    548 
    549     private class LoadDictionaryTask extends Thread {
    550         @Override
    551         public void run() {
    552             loadDictionaryAsync();
    553             synchronized (mUpdatingLock) {
    554                 mUpdatingDictionary = false;
    555             }
    556         }
    557     }
    558 
    559     private static char toLowerCase(char c) {
    560         char baseChar = c;
    561         if (c < BASE_CHARS.length) {
    562             baseChar = BASE_CHARS[c];
    563         }
    564         if (baseChar >= 'A' && baseChar <= 'Z') {
    565             return (char)(baseChar | 32);
    566         } else if (baseChar > 127) {
    567             return Character.toLowerCase(baseChar);
    568         }
    569         return baseChar;
    570     }
    571 
    572     /**
    573      * Table mapping most combined Latin, Greek, and Cyrillic characters
    574      * to their base characters.  If c is in range, BASE_CHARS[c] == c
    575      * if c is not a combined character, or the base character if it
    576      * is combined.
    577      */
    578     private static final char BASE_CHARS[] = {
    579         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    580         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
    581         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    582         0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    583         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    584         0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    585         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    586         0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    587         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    588         0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    589         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    590         0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
    591         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
    592         0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
    593         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
    594         0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
    595         0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    596         0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
    597         0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    598         0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
    599         0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
    600         0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
    601         0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
    602         0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
    603         0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
    604         0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
    605         0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
    606         0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
    607                                                                         // Manually changed df to 73
    608         0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
    609         0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
    610         0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
    611         0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
    612         0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
    613         0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
    614         0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
    615         0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
    616         0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
    617         0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
    618         0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
    619         0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
    620         0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
    621         0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
    622         0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
    623         0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
    624         0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
    625         0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
    626         0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
    627         0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
    628         0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
    629         0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
    630         0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
    631         0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
    632         0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
    633         0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
    634         0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
    635         0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
    636         0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
    637         0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
    638         0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
    639         0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
    640         0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
    641         0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
    642         0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
    643         0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
    644         0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
    645         0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
    646         0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
    647         0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
    648         0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
    649         0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
    650         0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
    651         0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
    652         0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
    653         0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
    654         0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
    655         0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
    656         0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
    657         0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
    658         0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
    659         0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
    660         0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
    661         0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
    662         0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
    663         0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
    664         0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
    665         0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
    666         0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
    667         0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
    668         0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
    669         0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
    670         0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
    671         0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
    672         0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
    673         0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
    674         0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
    675         0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
    676         0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
    677         0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
    678         0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
    679         0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
    680         0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
    681         0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
    682         0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
    683         0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
    684         0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
    685         0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
    686         0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
    687         0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
    688         0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
    689         0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
    690         0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
    691         0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
    692         0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
    693         0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
    694         0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
    695         0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
    696         0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
    697         0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
    698         0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
    699         0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
    700         0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
    701         0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
    702         0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
    703         0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
    704         0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
    705         0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
    706         0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
    707         0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
    708         0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
    709         0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
    710         0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
    711         0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
    712         0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
    713         0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
    714         0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
    715         0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
    716         0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
    717         0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
    718         0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
    719         0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
    720         0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
    721         0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
    722         0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
    723         0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
    724         0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
    725         0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
    726         0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
    727         0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
    728         0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
    729         0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
    730         0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
    731         0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
    732         0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
    733         0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
    734         0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
    735         0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
    736         0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
    737         0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
    738         0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
    739         0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
    740     };
    741 
    742     // generated with:
    743     // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
    744 
    745 }
    746