Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import com.android.inputmethod.latin.Dictionary.WordCallback;
     20 
     21 import android.content.Context;
     22 import android.os.AsyncTask;
     23 import android.os.SystemClock;
     24 
     25 /**
     26  * Base class for an in-memory dictionary that can grow dynamically and can
     27  * be searched for suggestions and valid words.
     28  */
     29 public class ExpandableDictionary extends Dictionary {
     30     private Context mContext;
     31     private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
     32     private int mMaxDepth;
     33     private int mInputLength;
     34     private int[] mNextLettersFrequencies;
     35 
     36     public static final int MAX_WORD_LENGTH = 32;
     37     private static final char QUOTE = '\'';
     38 
     39     private boolean mRequiresReload;
     40 
     41     private boolean mUpdatingDictionary;
     42 
     43     // Use this lock before touching mUpdatingDictionary & mRequiresDownload
     44     private Object mUpdatingLock = new Object();
     45 
     46     static class Node {
     47         char code;
     48         int frequency;
     49         boolean terminal;
     50         NodeArray children;
     51     }
     52 
     53     static class NodeArray {
     54         Node[] data;
     55         int length = 0;
     56         private static final int INCREMENT = 2;
     57 
     58         NodeArray() {
     59             data = new Node[INCREMENT];
     60         }
     61 
     62         void add(Node n) {
     63             if (length + 1 > data.length) {
     64                 Node[] tempData = new Node[length + INCREMENT];
     65                 if (length > 0) {
     66                     System.arraycopy(data, 0, tempData, 0, length);
     67                 }
     68                 data = tempData;
     69             }
     70             data[length++] = n;
     71         }
     72     }
     73 
     74     private NodeArray mRoots;
     75 
     76     private int[][] mCodes;
     77 
     78     ExpandableDictionary(Context context) {
     79         mContext = context;
     80         clearDictionary();
     81         mCodes = new int[MAX_WORD_LENGTH][];
     82     }
     83 
     84     public void loadDictionary() {
     85         synchronized (mUpdatingLock) {
     86             startDictionaryLoadingTaskLocked();
     87         }
     88     }
     89 
     90     public void startDictionaryLoadingTaskLocked() {
     91         if (!mUpdatingDictionary) {
     92             mUpdatingDictionary = true;
     93             mRequiresReload = false;
     94             new LoadDictionaryTask().execute();
     95         }
     96     }
     97 
     98     public void setRequiresReload(boolean reload) {
     99         synchronized (mUpdatingLock) {
    100             mRequiresReload = reload;
    101         }
    102     }
    103 
    104     public boolean getRequiresReload() {
    105         return mRequiresReload;
    106     }
    107 
    108     /** Override to load your dictionary here, on a background thread. */
    109     public void loadDictionaryAsync() {
    110     }
    111 
    112     Context getContext() {
    113         return mContext;
    114     }
    115 
    116     int getMaxWordLength() {
    117         return MAX_WORD_LENGTH;
    118     }
    119 
    120     public void addWord(String word, int frequency) {
    121         addWordRec(mRoots, word, 0, frequency);
    122     }
    123 
    124     private void addWordRec(NodeArray children, final String word,
    125             final int depth, final int frequency) {
    126 
    127         final int wordLength = word.length();
    128         final char c = word.charAt(depth);
    129         // Does children have the current character?
    130         final int childrenLength = children.length;
    131         Node childNode = null;
    132         boolean found = false;
    133         for (int i = 0; i < childrenLength; i++) {
    134             childNode = children.data[i];
    135             if (childNode.code == c) {
    136                 found = true;
    137                 break;
    138             }
    139         }
    140         if (!found) {
    141             childNode = new Node();
    142             childNode.code = c;
    143             children.add(childNode);
    144         }
    145         if (wordLength == depth + 1) {
    146             // Terminate this word
    147             childNode.terminal = true;
    148             childNode.frequency = Math.max(frequency, childNode.frequency);
    149             if (childNode.frequency > 255) childNode.frequency = 255;
    150             return;
    151         }
    152         if (childNode.children == null) {
    153             childNode.children = new NodeArray();
    154         }
    155         addWordRec(childNode.children, word, depth + 1, frequency);
    156     }
    157 
    158     @Override
    159     public void getWords(final WordComposer codes, final WordCallback callback,
    160             int[] nextLettersFrequencies) {
    161         synchronized (mUpdatingLock) {
    162             // If we need to update, start off a background task
    163             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    164             // Currently updating contacts, don't return any results.
    165             if (mUpdatingDictionary) return;
    166         }
    167 
    168         mInputLength = codes.size();
    169         mNextLettersFrequencies = nextLettersFrequencies;
    170         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
    171         // Cache the codes so that we don't have to lookup an array list
    172         for (int i = 0; i < mInputLength; i++) {
    173             mCodes[i] = codes.getCodesAt(i);
    174         }
    175         mMaxDepth = mInputLength * 3;
    176         getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback);
    177         for (int i = 0; i < mInputLength; i++) {
    178             getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback);
    179         }
    180     }
    181 
    182     @Override
    183     public synchronized boolean isValidWord(CharSequence word) {
    184         synchronized (mUpdatingLock) {
    185             // If we need to update, start off a background task
    186             if (mRequiresReload) startDictionaryLoadingTaskLocked();
    187             if (mUpdatingDictionary) return false;
    188         }
    189         final int freq = getWordFrequencyRec(mRoots, word, 0, word.length());
    190         return freq > -1;
    191     }
    192 
    193     /**
    194      * Returns the word's frequency or -1 if not found
    195      */
    196     public int getWordFrequency(CharSequence word) {
    197         return getWordFrequencyRec(mRoots, word, 0, word.length());
    198     }
    199 
    200     /**
    201      * Returns the word's frequency or -1 if not found
    202      */
    203     private int getWordFrequencyRec(final NodeArray children, final CharSequence word,
    204             final int offset, final int length) {
    205         final int count = children.length;
    206         char currentChar = word.charAt(offset);
    207         for (int j = 0; j < count; j++) {
    208             final Node node = children.data[j];
    209             if (node.code == currentChar) {
    210                 if (offset == length - 1) {
    211                     if (node.terminal) {
    212                         return node.frequency;
    213                     }
    214                 } else {
    215                     if (node.children != null) {
    216                         int freq = getWordFrequencyRec(node.children, word, offset + 1, length);
    217                         if (freq > -1) return freq;
    218                     }
    219                 }
    220             }
    221         }
    222         return -1;
    223     }
    224 
    225     /**
    226      * Recursively traverse the tree for words that match the input. Input consists of
    227      * a list of arrays. Each item in the list is one input character position. An input
    228      * character is actually an array of multiple possible candidates. This function is not
    229      * optimized for speed, assuming that the user dictionary will only be a few hundred words in
    230      * size.
    231      * @param roots node whose children have to be search for matches
    232      * @param codes the input character codes
    233      * @param word the word being composed as a possible match
    234      * @param depth the depth of traversal - the length of the word being composed thus far
    235      * @param completion whether the traversal is now in completion mode - meaning that we've
    236      * exhausted the input and we're looking for all possible suffixes.
    237      * @param snr current weight of the word being formed
    238      * @param inputIndex position in the input characters. This can be off from the depth in
    239      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
    240      * "wouldve", it could be matching "would've", so the depth will be one more than the
    241      * inputIndex
    242      * @param callback the callback class for adding a word
    243      */
    244     protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
    245             final int depth, boolean completion, int snr, int inputIndex, int skipPos,
    246             WordCallback callback) {
    247         final int count = roots.length;
    248         final int codeSize = mInputLength;
    249         // Optimization: Prune out words that are too long compared to how much was typed.
    250         if (depth > mMaxDepth) {
    251             return;
    252         }
    253         int[] currentChars = null;
    254         if (codeSize <= inputIndex) {
    255             completion = true;
    256         } else {
    257             currentChars = mCodes[inputIndex];
    258         }
    259 
    260         for (int i = 0; i < count; i++) {
    261             final Node node = roots.data[i];
    262             final char c = node.code;
    263             final char lowerC = toLowerCase(c);
    264             final boolean terminal = node.terminal;
    265             final NodeArray children = node.children;
    266             final int freq = node.frequency;
    267             if (completion) {
    268                 word[depth] = c;
    269                 if (terminal) {
    270                     if (!callback.addWord(word, 0, depth + 1, freq * snr)) {
    271                         return;
    272                     }
    273                     // Add to frequency of next letters for predictive correction
    274                     if (mNextLettersFrequencies != null && depth >= inputIndex && skipPos < 0
    275                             && mNextLettersFrequencies.length > word[inputIndex]) {
    276                         mNextLettersFrequencies[word[inputIndex]]++;
    277                     }
    278                 }
    279                 if (children != null) {
    280                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
    281                             skipPos, callback);
    282                 }
    283             } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) {
    284                 // Skip the ' and continue deeper
    285                 word[depth] = c;
    286                 if (children != null) {
    287                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
    288                             skipPos, callback);
    289                 }
    290             } else {
    291                 // Don't use alternatives if we're looking for missing characters
    292                 final int alternativesSize = skipPos >= 0? 1 : currentChars.length;
    293                 for (int j = 0; j < alternativesSize; j++) {
    294                     final int addedAttenuation = (j > 0 ? 1 : 2);
    295                     final int currentChar = currentChars[j];
    296                     if (currentChar == -1) {
    297                         break;
    298                     }
    299                     if (currentChar == lowerC || currentChar == c) {
    300                         word[depth] = c;
    301 
    302                         if (codeSize == inputIndex + 1) {
    303                             if (terminal) {
    304                                 if (INCLUDE_TYPED_WORD_IF_VALID
    305                                         || !same(word, depth + 1, codes.getTypedWord())) {
    306                                     int finalFreq = freq * snr * addedAttenuation;
    307                                     if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
    308                                     callback.addWord(word, 0, depth + 1, finalFreq);
    309                                 }
    310                             }
    311                             if (children != null) {
    312                                 getWordsRec(children, codes, word, depth + 1,
    313                                         true, snr * addedAttenuation, inputIndex + 1,
    314                                         skipPos, callback);
    315                             }
    316                         } else if (children != null) {
    317                             getWordsRec(children, codes, word, depth + 1,
    318                                     false, snr * addedAttenuation, inputIndex + 1,
    319                                     skipPos, callback);
    320                         }
    321                     }
    322                 }
    323             }
    324         }
    325     }
    326 
    327     protected void clearDictionary() {
    328         mRoots = new NodeArray();
    329     }
    330 
    331     private class LoadDictionaryTask extends AsyncTask<Void, Void, Void> {
    332         @Override
    333         protected Void doInBackground(Void... v) {
    334             loadDictionaryAsync();
    335             return null;
    336         }
    337 
    338         @Override
    339         protected void onPostExecute(Void result) {
    340             // TODO Auto-generated method stub
    341             synchronized (mUpdatingLock) {
    342                 mUpdatingDictionary = false;
    343             }
    344             super.onPostExecute(result);
    345         }
    346 
    347     }
    348 
    349     static char toLowerCase(char c) {
    350         if (c < BASE_CHARS.length) {
    351             c = BASE_CHARS[c];
    352         }
    353         if (c >= 'A' && c <= 'Z') {
    354             c = (char) (c | 32);
    355         } else if (c > 127) {
    356             c = Character.toLowerCase(c);
    357         }
    358         return c;
    359     }
    360 
    361     /**
    362      * Table mapping most combined Latin, Greek, and Cyrillic characters
    363      * to their base characters.  If c is in range, BASE_CHARS[c] == c
    364      * if c is not a combined character, or the base character if it
    365      * is combined.
    366      */
    367     static final char BASE_CHARS[] = {
    368         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    369         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
    370         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    371         0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    372         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    373         0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    374         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    375         0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    376         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    377         0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    378         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    379         0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
    380         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
    381         0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
    382         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
    383         0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
    384         0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    385         0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
    386         0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    387         0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
    388         0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
    389         0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
    390         0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
    391         0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
    392         0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
    393         0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
    394         0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
    395         0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
    396                                                                         // Manually changed df to 73
    397         0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
    398         0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
    399         0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
    400         0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
    401         0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
    402         0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
    403         0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
    404         0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
    405         0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
    406         0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
    407         0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
    408         0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
    409         0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
    410         0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
    411         0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
    412         0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
    413         0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
    414         0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
    415         0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
    416         0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
    417         0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
    418         0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
    419         0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
    420         0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
    421         0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
    422         0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
    423         0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
    424         0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
    425         0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
    426         0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
    427         0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
    428         0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
    429         0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
    430         0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
    431         0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
    432         0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
    433         0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
    434         0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
    435         0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
    436         0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
    437         0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
    438         0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
    439         0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
    440         0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
    441         0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
    442         0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
    443         0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
    444         0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
    445         0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
    446         0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
    447         0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
    448         0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
    449         0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
    450         0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
    451         0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
    452         0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
    453         0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
    454         0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
    455         0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
    456         0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
    457         0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
    458         0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
    459         0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
    460         0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
    461         0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
    462         0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
    463         0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
    464         0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
    465         0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
    466         0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
    467         0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
    468         0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
    469         0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
    470         0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
    471         0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
    472         0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
    473         0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
    474         0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
    475         0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
    476         0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
    477         0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
    478         0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
    479         0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
    480         0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
    481         0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
    482         0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
    483         0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
    484         0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
    485         0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
    486         0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
    487         0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
    488         0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
    489         0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
    490         0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
    491         0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
    492         0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
    493         0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
    494         0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
    495         0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
    496         0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
    497         0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
    498         0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
    499         0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
    500         0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
    501         0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
    502         0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
    503         0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
    504         0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
    505         0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
    506         0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
    507         0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
    508         0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
    509         0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
    510         0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
    511         0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
    512         0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
    513         0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
    514         0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
    515         0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
    516         0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
    517         0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
    518         0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
    519         0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
    520         0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
    521         0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
    522         0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
    523         0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
    524         0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
    525         0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
    526         0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
    527         0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
    528         0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
    529     };
    530 
    531     // generated with:
    532     // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
    533 
    534 }
    535