Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import java.util.LinkedList;
     20 
     21 import android.content.Context;
     22 import android.os.AsyncTask;
     23 
     24 /**
     25  * Base class for an in-memory dictionary that can grow dynamically and can
     26  * be searched for suggestions and valid words.
     27  */
     28 public class ExpandableDictionary extends Dictionary {
     29     /**
     30      * There is difference between what java and native code can handle.
     31      * It uses 32 because Java stack overflows when greater value is used.
     32      */
     33     protected static final int MAX_WORD_LENGTH = 32;
     34 
     35     private Context mContext;
     36     private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
     37     private int mDicTypeId;
     38     private int mMaxDepth;
     39     private int mInputLength;
     40     private int[] mNextLettersFrequencies;
     41     private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH);
     42 
     43     private static final char QUOTE = '\'';
     44 
     45     private boolean mRequiresReload;
     46 
     47     private boolean mUpdatingDictionary;
     48 
     49     // Use this lock before touching mUpdatingDictionary & mRequiresDownload
     50     private Object mUpdatingLock = new Object();
     51 
     52     static class Node {
     53         char code;
     54         int frequency;
     55         boolean terminal;
     56         Node parent;
     57         NodeArray children;
     58         LinkedList<NextWord> ngrams; // Supports ngram
     59     }
     60 
     61     static class NodeArray {
     62         Node[] data;
     63         int length = 0;
     64         private static final int INCREMENT = 2;
     65 
     66         NodeArray() {
     67             data = new Node[INCREMENT];
     68         }
     69 
     70         void add(Node n) {
     71             if (length + 1 > data.length) {
     72                 Node[] tempData = new Node[length + INCREMENT];
     73                 if (length > 0) {
     74                     System.arraycopy(data, 0, tempData, 0, length);
     75                 }
     76                 data = tempData;
     77             }
     78             data[length++] = n;
     79         }
     80     }
     81 
     82     static class NextWord {
     83         Node word;
     84         NextWord nextWord;
     85         int frequency;
     86 
     87         NextWord(Node word, int frequency) {
     88             this.word = word;
     89             this.frequency = frequency;
     90         }
     91     }
     92 
     93 
     94     private NodeArray mRoots;
     95 
     96     private int[][] mCodes;
     97 
     98     ExpandableDictionary(Context context, int dicTypeId) {
     99         mContext = context;
    100         clearDictionary();
    101         mCodes = new int[MAX_WORD_LENGTH][];
    102         mDicTypeId = dicTypeId;
    103     }
    104 
    105     public void loadDictionary() {
    106         synchronized (mUpdatingLock) {
    107             startDictionaryLoadingTaskLocked();
    108         }
    109     }
    110 
    111     public void startDictionaryLoadingTaskLocked() {
    112         if (!mUpdatingDictionary) {
    113             mUpdatingDictionary = true;
    114             mRequiresReload = false;
    115             new LoadDictionaryTask().execute();
    116         }
    117     }
    118 
    119     public void setRequiresReload(boolean reload) {
    120         synchronized (mUpdatingLock) {
    121             mRequiresReload = reload;
    122         }
    123     }
    124 
    125     public boolean getRequiresReload() {
    126         return mRequiresReload;
    127     }
    128 
    129     /** Override to load your dictionary here, on a background thread. */
    130     public void loadDictionaryAsync() {
    131     }
    132 
    133     Context getContext() {
    134         return mContext;
    135     }
    136 
    137     int getMaxWordLength() {
    138         return MAX_WORD_LENGTH;
    139     }
    140 
    141     public void addWord(String word, int frequency) {
    142         addWordRec(mRoots, word, 0, frequency, null);
    143     }
    144 
    145     private void addWordRec(NodeArray children, final String word, final int depth,
    146             final int frequency, Node parentNode) {
    147         final int wordLength = word.length();
    148         final char c = word.charAt(depth);
    149         // Does children have the current character?
    150         final int childrenLength = children.length;
    151         Node childNode = null;
    152         boolean found = false;
    153         for (int i = 0; i < childrenLength; i++) {
    154             childNode = children.data[i];
    155             if (childNode.code == c) {
    156                 found = true;
    157                 break;
    158             }
    159         }
    160         if (!found) {
    161             childNode = new Node();
    162             childNode.code = c;
    163             childNode.parent = parentNode;
    164             children.add(childNode);
    165         }
    166         if (wordLength == depth + 1) {
    167             // Terminate this word
    168             childNode.terminal = true;
    169             childNode.frequency = Math.max(frequency, childNode.frequency);
    170             if (childNode.frequency > 255) childNode.frequency = 255;
    171             return;
    172         }
    173         if (childNode.children == null) {
    174             childNode.children = new NodeArray();
    175         }
    176         addWordRec(childNode.children, word, depth + 1, frequency, childNode);
    177     }
    178 
    179     @Override
    180     public void getWords(final WordComposer codes, final WordCallback callback,
    181             int[] nextLettersFrequencies) {
    182         synchronized (mUpdatingLock) {
    183             // If we need to update, start off a background task
    184             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    185             // Currently updating contacts, don't return any results.
    186             if (mUpdatingDictionary) return;
    187         }
    188 
    189         mInputLength = codes.size();
    190         mNextLettersFrequencies = nextLettersFrequencies;
    191         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
    192         // Cache the codes so that we don't have to lookup an array list
    193         for (int i = 0; i < mInputLength; i++) {
    194             mCodes[i] = codes.getCodesAt(i);
    195         }
    196         mMaxDepth = mInputLength * 3;
    197         getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback);
    198         for (int i = 0; i < mInputLength; i++) {
    199             getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback);
    200         }
    201     }
    202 
    203     @Override
    204     public synchronized boolean isValidWord(CharSequence word) {
    205         synchronized (mUpdatingLock) {
    206             // If we need to update, start off a background task
    207             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    208             if (mUpdatingDictionary) return false;
    209         }
    210         final int freq = getWordFrequency(word);
    211         return freq > -1;
    212     }
    213 
    214     /**
    215      * Returns the word's frequency or -1 if not found
    216      */
    217     public int getWordFrequency(CharSequence word) {
    218         Node node = searchNode(mRoots, word, 0, word.length());
    219         return (node == null) ? -1 : node.frequency;
    220     }
    221 
    222     /**
    223      * Recursively traverse the tree for words that match the input. Input consists of
    224      * a list of arrays. Each item in the list is one input character position. An input
    225      * character is actually an array of multiple possible candidates. This function is not
    226      * optimized for speed, assuming that the user dictionary will only be a few hundred words in
    227      * size.
    228      * @param roots node whose children have to be search for matches
    229      * @param codes the input character codes
    230      * @param word the word being composed as a possible match
    231      * @param depth the depth of traversal - the length of the word being composed thus far
    232      * @param completion whether the traversal is now in completion mode - meaning that we've
    233      * exhausted the input and we're looking for all possible suffixes.
    234      * @param snr current weight of the word being formed
    235      * @param inputIndex position in the input characters. This can be off from the depth in
    236      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
    237      * "wouldve", it could be matching "would've", so the depth will be one more than the
    238      * inputIndex
    239      * @param callback the callback class for adding a word
    240      */
    241     protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
    242             final int depth, boolean completion, int snr, int inputIndex, int skipPos,
    243             WordCallback callback) {
    244         final int count = roots.length;
    245         final int codeSize = mInputLength;
    246         // Optimization: Prune out words that are too long compared to how much was typed.
    247         if (depth > mMaxDepth) {
    248             return;
    249         }
    250         int[] currentChars = null;
    251         if (codeSize <= inputIndex) {
    252             completion = true;
    253         } else {
    254             currentChars = mCodes[inputIndex];
    255         }
    256 
    257         for (int i = 0; i < count; i++) {
    258             final Node node = roots.data[i];
    259             final char c = node.code;
    260             final char lowerC = toLowerCase(c);
    261             final boolean terminal = node.terminal;
    262             final NodeArray children = node.children;
    263             final int freq = node.frequency;
    264             if (completion) {
    265                 word[depth] = c;
    266                 if (terminal) {
    267                     if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId,
    268                                 DataType.UNIGRAM)) {
    269                         return;
    270                     }
    271                     // Add to frequency of next letters for predictive correction
    272                     if (mNextLettersFrequencies != null && depth >= inputIndex && skipPos < 0
    273                             && mNextLettersFrequencies.length > word[inputIndex]) {
    274                         mNextLettersFrequencies[word[inputIndex]]++;
    275                     }
    276                 }
    277                 if (children != null) {
    278                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
    279                             skipPos, callback);
    280                 }
    281             } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) {
    282                 // Skip the ' and continue deeper
    283                 word[depth] = c;
    284                 if (children != null) {
    285                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
    286                             skipPos, callback);
    287                 }
    288             } else {
    289                 // Don't use alternatives if we're looking for missing characters
    290                 final int alternativesSize = skipPos >= 0? 1 : currentChars.length;
    291                 for (int j = 0; j < alternativesSize; j++) {
    292                     final int addedAttenuation = (j > 0 ? 1 : 2);
    293                     final int currentChar = currentChars[j];
    294                     if (currentChar == -1) {
    295                         break;
    296                     }
    297                     if (currentChar == lowerC || currentChar == c) {
    298                         word[depth] = c;
    299 
    300                         if (codeSize == inputIndex + 1) {
    301                             if (terminal) {
    302                                 if (INCLUDE_TYPED_WORD_IF_VALID
    303                                         || !same(word, depth + 1, codes.getTypedWord())) {
    304                                     int finalFreq = freq * snr * addedAttenuation;
    305                                     if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
    306                                     callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
    307                                             DataType.UNIGRAM);
    308                                 }
    309                             }
    310                             if (children != null) {
    311                                 getWordsRec(children, codes, word, depth + 1,
    312                                         true, snr * addedAttenuation, inputIndex + 1,
    313                                         skipPos, callback);
    314                             }
    315                         } else if (children != null) {
    316                             getWordsRec(children, codes, word, depth + 1,
    317                                     false, snr * addedAttenuation, inputIndex + 1,
    318                                     skipPos, callback);
    319                         }
    320                     }
    321                 }
    322             }
    323         }
    324     }
    325 
    326     protected int setBigram(String word1, String word2, int frequency) {
    327         return addOrSetBigram(word1, word2, frequency, false);
    328     }
    329 
    330     protected int addBigram(String word1, String word2, int frequency) {
    331         return addOrSetBigram(word1, word2, frequency, true);
    332     }
    333 
    334     /**
    335      * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
    336      * @param frequency frequency for this bigrams
    337      * @param addFrequency if true, it adds to current frequency
    338      * @return returns the final frequency
    339      */
    340     private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) {
    341         Node firstWord = searchWord(mRoots, word1, 0, null);
    342         Node secondWord = searchWord(mRoots, word2, 0, null);
    343         LinkedList<NextWord> bigram = firstWord.ngrams;
    344         if (bigram == null || bigram.size() == 0) {
    345             firstWord.ngrams = new LinkedList<NextWord>();
    346             bigram = firstWord.ngrams;
    347         } else {
    348             for (NextWord nw : bigram) {
    349                 if (nw.word == secondWord) {
    350                     if (addFrequency) {
    351                         nw.frequency += frequency;
    352                     } else {
    353                         nw.frequency = frequency;
    354                     }
    355                     return nw.frequency;
    356                 }
    357             }
    358         }
    359         NextWord nw = new NextWord(secondWord, frequency);
    360         firstWord.ngrams.add(nw);
    361         return frequency;
    362     }
    363 
    364     /**
    365      * Searches for the word and add the word if it does not exist.
    366      * @return Returns the terminal node of the word we are searching for.
    367      */
    368     private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
    369         final int wordLength = word.length();
    370         final char c = word.charAt(depth);
    371         // Does children have the current character?
    372         final int childrenLength = children.length;
    373         Node childNode = null;
    374         boolean found = false;
    375         for (int i = 0; i < childrenLength; i++) {
    376             childNode = children.data[i];
    377             if (childNode.code == c) {
    378                 found = true;
    379                 break;
    380             }
    381         }
    382         if (!found) {
    383             childNode = new Node();
    384             childNode.code = c;
    385             childNode.parent = parentNode;
    386             children.add(childNode);
    387         }
    388         if (wordLength == depth + 1) {
    389             // Terminate this word
    390             childNode.terminal = true;
    391             return childNode;
    392         }
    393         if (childNode.children == null) {
    394             childNode.children = new NodeArray();
    395         }
    396         return searchWord(childNode.children, word, depth + 1, childNode);
    397     }
    398 
    399     // @VisibleForTesting
    400     boolean reloadDictionaryIfRequired() {
    401         synchronized (mUpdatingLock) {
    402             // If we need to update, start off a background task
    403             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    404             // Currently updating contacts, don't return any results.
    405             return mUpdatingDictionary;
    406         }
    407     }
    408 
    409     private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) {
    410         Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
    411         if (prevWord != null && prevWord.ngrams != null) {
    412             reverseLookUp(prevWord.ngrams, callback);
    413         }
    414     }
    415 
    416     @Override
    417     public void getBigrams(final WordComposer codes, final CharSequence previousWord,
    418             final WordCallback callback, int[] nextLettersFrequencies) {
    419         if (!reloadDictionaryIfRequired()) {
    420             runReverseLookUp(previousWord, callback);
    421         }
    422     }
    423 
    424     /**
    425      * Used only for testing purposes
    426      * This function will wait for loading from database to be done
    427      */
    428     void waitForDictionaryLoading() {
    429         while (mUpdatingDictionary) {
    430             try {
    431                 Thread.sleep(100);
    432             } catch (InterruptedException e) {
    433             }
    434         }
    435     }
    436 
    437     /**
    438      * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
    439      * through callback.
    440      * @param terminalNodes list of terminal nodes we want to add
    441      */
    442     private void reverseLookUp(LinkedList<NextWord> terminalNodes,
    443             final WordCallback callback) {
    444         Node node;
    445         int freq;
    446         for (NextWord nextWord : terminalNodes) {
    447             node = nextWord.word;
    448             freq = nextWord.frequency;
    449             // TODO Not the best way to limit suggestion threshold
    450             if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) {
    451                 sb.setLength(0);
    452                 do {
    453                     sb.insert(0, node.code);
    454                     node = node.parent;
    455                 } while(node != null);
    456 
    457                 // TODO better way to feed char array?
    458                 callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
    459                         DataType.BIGRAM);
    460             }
    461         }
    462     }
    463 
    464     /**
    465      * Search for the terminal node of the word
    466      * @return Returns the terminal node of the word if the word exists
    467      */
    468     private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
    469             final int length) {
    470         // TODO Consider combining with addWordRec
    471         final int count = children.length;
    472         char currentChar = word.charAt(offset);
    473         for (int j = 0; j < count; j++) {
    474             final Node node = children.data[j];
    475             if (node.code == currentChar) {
    476                 if (offset == length - 1) {
    477                     if (node.terminal) {
    478                         return node;
    479                     }
    480                 } else {
    481                     if (node.children != null) {
    482                         Node returnNode = searchNode(node.children, word, offset + 1, length);
    483                         if (returnNode != null) return returnNode;
    484                     }
    485                 }
    486             }
    487         }
    488         return null;
    489     }
    490 
    491     protected void clearDictionary() {
    492         mRoots = new NodeArray();
    493     }
    494 
    495     private class LoadDictionaryTask extends AsyncTask<Void, Void, Void> {
    496         @Override
    497         protected Void doInBackground(Void... v) {
    498             loadDictionaryAsync();
    499             synchronized (mUpdatingLock) {
    500                 mUpdatingDictionary = false;
    501             }
    502             return null;
    503         }
    504     }
    505 
    506     static char toLowerCase(char c) {
    507         if (c < BASE_CHARS.length) {
    508             c = BASE_CHARS[c];
    509         }
    510         if (c >= 'A' && c <= 'Z') {
    511             c = (char) (c | 32);
    512         } else if (c > 127) {
    513             c = Character.toLowerCase(c);
    514         }
    515         return c;
    516     }
    517 
    518     /**
    519      * Table mapping most combined Latin, Greek, and Cyrillic characters
    520      * to their base characters.  If c is in range, BASE_CHARS[c] == c
    521      * if c is not a combined character, or the base character if it
    522      * is combined.
    523      */
    524     static final char BASE_CHARS[] = {
    525         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    526         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
    527         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    528         0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    529         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    530         0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    531         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    532         0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    533         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    534         0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    535         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    536         0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
    537         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
    538         0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
    539         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
    540         0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
    541         0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    542         0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
    543         0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    544         0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
    545         0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
    546         0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
    547         0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
    548         0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
    549         0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
    550         0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
    551         0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
    552         0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
    553                                                                         // Manually changed df to 73
    554         0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
    555         0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
    556         0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
    557         0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
    558         0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
    559         0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
    560         0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
    561         0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
    562         0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
    563         0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
    564         0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
    565         0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
    566         0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
    567         0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
    568         0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
    569         0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
    570         0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
    571         0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
    572         0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
    573         0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
    574         0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
    575         0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
    576         0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
    577         0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
    578         0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
    579         0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
    580         0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
    581         0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
    582         0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
    583         0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
    584         0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
    585         0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
    586         0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
    587         0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
    588         0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
    589         0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
    590         0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
    591         0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
    592         0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
    593         0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
    594         0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
    595         0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
    596         0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
    597         0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
    598         0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
    599         0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
    600         0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
    601         0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
    602         0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
    603         0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
    604         0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
    605         0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
    606         0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
    607         0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
    608         0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
    609         0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
    610         0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
    611         0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
    612         0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
    613         0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
    614         0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
    615         0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
    616         0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
    617         0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
    618         0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
    619         0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
    620         0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
    621         0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
    622         0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
    623         0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
    624         0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
    625         0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
    626         0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
    627         0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
    628         0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
    629         0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
    630         0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
    631         0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
    632         0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
    633         0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
    634         0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
    635         0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
    636         0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
    637         0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
    638         0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
    639         0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
    640         0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
    641         0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
    642         0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
    643         0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
    644         0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
    645         0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
    646         0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
    647         0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
    648         0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
    649         0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
    650         0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
    651         0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
    652         0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
    653         0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
    654         0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
    655         0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
    656         0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
    657         0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
    658         0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
    659         0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
    660         0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
    661         0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
    662         0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
    663         0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
    664         0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
    665         0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
    666         0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
    667         0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
    668         0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
    669         0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
    670         0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
    671         0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
    672         0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
    673         0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
    674         0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
    675         0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
    676         0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
    677         0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
    678         0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
    679         0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
    680         0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
    681         0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
    682         0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
    683         0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
    684         0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
    685         0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
    686     };
    687 
    688     // generated with:
    689     // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
    690 
    691 }
    692