1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import com.android.inputmethod.latin.Dictionary.WordCallback; 20 21 import android.content.Context; 22 import android.os.AsyncTask; 23 import android.os.SystemClock; 24 25 /** 26 * Base class for an in-memory dictionary that can grow dynamically and can 27 * be searched for suggestions and valid words. 28 */ 29 public class ExpandableDictionary extends Dictionary { 30 private Context mContext; 31 private char[] mWordBuilder = new char[MAX_WORD_LENGTH]; 32 private int mMaxDepth; 33 private int mInputLength; 34 private int[] mNextLettersFrequencies; 35 36 public static final int MAX_WORD_LENGTH = 32; 37 private static final char QUOTE = '\''; 38 39 private boolean mRequiresReload; 40 41 private boolean mUpdatingDictionary; 42 43 // Use this lock before touching mUpdatingDictionary & mRequiresDownload 44 private Object mUpdatingLock = new Object(); 45 46 static class Node { 47 char code; 48 int frequency; 49 boolean terminal; 50 NodeArray children; 51 } 52 53 static class NodeArray { 54 Node[] data; 55 int length = 0; 56 private static final int INCREMENT = 2; 57 58 NodeArray() { 59 data = new Node[INCREMENT]; 60 } 61 62 void add(Node n) { 63 if (length + 1 > data.length) { 64 Node[] tempData = new Node[length + INCREMENT]; 65 if (length > 0) { 66 System.arraycopy(data, 0, tempData, 0, length); 67 } 68 data = tempData; 69 } 70 data[length++] = n; 71 } 72 } 73 74 private NodeArray mRoots; 75 76 private int[][] mCodes; 77 78 ExpandableDictionary(Context context) { 79 mContext = context; 80 clearDictionary(); 81 mCodes = new int[MAX_WORD_LENGTH][]; 82 } 83 84 public void loadDictionary() { 85 synchronized (mUpdatingLock) { 86 startDictionaryLoadingTaskLocked(); 87 } 88 } 89 90 public void startDictionaryLoadingTaskLocked() { 91 if (!mUpdatingDictionary) { 92 mUpdatingDictionary = true; 93 mRequiresReload = false; 94 new LoadDictionaryTask().execute(); 95 } 96 } 97 98 public void setRequiresReload(boolean reload) { 99 synchronized (mUpdatingLock) { 100 mRequiresReload = reload; 101 } 102 } 103 104 public boolean getRequiresReload() { 105 return mRequiresReload; 106 } 107 108 /** Override to load your dictionary here, on a background thread. */ 109 public void loadDictionaryAsync() { 110 } 111 112 Context getContext() { 113 return mContext; 114 } 115 116 int getMaxWordLength() { 117 return MAX_WORD_LENGTH; 118 } 119 120 public void addWord(String word, int frequency) { 121 addWordRec(mRoots, word, 0, frequency); 122 } 123 124 private void addWordRec(NodeArray children, final String word, 125 final int depth, final int frequency) { 126 127 final int wordLength = word.length(); 128 final char c = word.charAt(depth); 129 // Does children have the current character? 130 final int childrenLength = children.length; 131 Node childNode = null; 132 boolean found = false; 133 for (int i = 0; i < childrenLength; i++) { 134 childNode = children.data[i]; 135 if (childNode.code == c) { 136 found = true; 137 break; 138 } 139 } 140 if (!found) { 141 childNode = new Node(); 142 childNode.code = c; 143 children.add(childNode); 144 } 145 if (wordLength == depth + 1) { 146 // Terminate this word 147 childNode.terminal = true; 148 childNode.frequency = Math.max(frequency, childNode.frequency); 149 if (childNode.frequency > 255) childNode.frequency = 255; 150 return; 151 } 152 if (childNode.children == null) { 153 childNode.children = new NodeArray(); 154 } 155 addWordRec(childNode.children, word, depth + 1, frequency); 156 } 157 158 @Override 159 public void getWords(final WordComposer codes, final WordCallback callback, 160 int[] nextLettersFrequencies) { 161 synchronized (mUpdatingLock) { 162 // If we need to update, start off a background task 163 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 164 // Currently updating contacts, don't return any results. 165 if (mUpdatingDictionary) return; 166 } 167 168 mInputLength = codes.size(); 169 mNextLettersFrequencies = nextLettersFrequencies; 170 if (mCodes.length < mInputLength) mCodes = new int[mInputLength][]; 171 // Cache the codes so that we don't have to lookup an array list 172 for (int i = 0; i < mInputLength; i++) { 173 mCodes[i] = codes.getCodesAt(i); 174 } 175 mMaxDepth = mInputLength * 3; 176 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback); 177 for (int i = 0; i < mInputLength; i++) { 178 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback); 179 } 180 } 181 182 @Override 183 public synchronized boolean isValidWord(CharSequence word) { 184 synchronized (mUpdatingLock) { 185 // If we need to update, start off a background task 186 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 187 if (mUpdatingDictionary) return false; 188 } 189 final int freq = getWordFrequencyRec(mRoots, word, 0, word.length()); 190 return freq > -1; 191 } 192 193 /** 194 * Returns the word's frequency or -1 if not found 195 */ 196 public int getWordFrequency(CharSequence word) { 197 return getWordFrequencyRec(mRoots, word, 0, word.length()); 198 } 199 200 /** 201 * Returns the word's frequency or -1 if not found 202 */ 203 private int getWordFrequencyRec(final NodeArray children, final CharSequence word, 204 final int offset, final int length) { 205 final int count = children.length; 206 char currentChar = word.charAt(offset); 207 for (int j = 0; j < count; j++) { 208 final Node node = children.data[j]; 209 if (node.code == currentChar) { 210 if (offset == length - 1) { 211 if (node.terminal) { 212 return node.frequency; 213 } 214 } else { 215 if (node.children != null) { 216 int freq = getWordFrequencyRec(node.children, word, offset + 1, length); 217 if (freq > -1) return freq; 218 } 219 } 220 } 221 } 222 return -1; 223 } 224 225 /** 226 * Recursively traverse the tree for words that match the input. Input consists of 227 * a list of arrays. Each item in the list is one input character position. An input 228 * character is actually an array of multiple possible candidates. This function is not 229 * optimized for speed, assuming that the user dictionary will only be a few hundred words in 230 * size. 231 * @param roots node whose children have to be search for matches 232 * @param codes the input character codes 233 * @param word the word being composed as a possible match 234 * @param depth the depth of traversal - the length of the word being composed thus far 235 * @param completion whether the traversal is now in completion mode - meaning that we've 236 * exhausted the input and we're looking for all possible suffixes. 237 * @param snr current weight of the word being formed 238 * @param inputIndex position in the input characters. This can be off from the depth in 239 * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type 240 * "wouldve", it could be matching "would've", so the depth will be one more than the 241 * inputIndex 242 * @param callback the callback class for adding a word 243 */ 244 protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, 245 final int depth, boolean completion, int snr, int inputIndex, int skipPos, 246 WordCallback callback) { 247 final int count = roots.length; 248 final int codeSize = mInputLength; 249 // Optimization: Prune out words that are too long compared to how much was typed. 250 if (depth > mMaxDepth) { 251 return; 252 } 253 int[] currentChars = null; 254 if (codeSize <= inputIndex) { 255 completion = true; 256 } else { 257 currentChars = mCodes[inputIndex]; 258 } 259 260 for (int i = 0; i < count; i++) { 261 final Node node = roots.data[i]; 262 final char c = node.code; 263 final char lowerC = toLowerCase(c); 264 final boolean terminal = node.terminal; 265 final NodeArray children = node.children; 266 final int freq = node.frequency; 267 if (completion) { 268 word[depth] = c; 269 if (terminal) { 270 if (!callback.addWord(word, 0, depth + 1, freq * snr)) { 271 return; 272 } 273 // Add to frequency of next letters for predictive correction 274 if (mNextLettersFrequencies != null && depth >= inputIndex && skipPos < 0 275 && mNextLettersFrequencies.length > word[inputIndex]) { 276 mNextLettersFrequencies[word[inputIndex]]++; 277 } 278 } 279 if (children != null) { 280 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 281 skipPos, callback); 282 } 283 } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) { 284 // Skip the ' and continue deeper 285 word[depth] = c; 286 if (children != null) { 287 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 288 skipPos, callback); 289 } 290 } else { 291 // Don't use alternatives if we're looking for missing characters 292 final int alternativesSize = skipPos >= 0? 1 : currentChars.length; 293 for (int j = 0; j < alternativesSize; j++) { 294 final int addedAttenuation = (j > 0 ? 1 : 2); 295 final int currentChar = currentChars[j]; 296 if (currentChar == -1) { 297 break; 298 } 299 if (currentChar == lowerC || currentChar == c) { 300 word[depth] = c; 301 302 if (codeSize == inputIndex + 1) { 303 if (terminal) { 304 if (INCLUDE_TYPED_WORD_IF_VALID 305 || !same(word, depth + 1, codes.getTypedWord())) { 306 int finalFreq = freq * snr * addedAttenuation; 307 if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER; 308 callback.addWord(word, 0, depth + 1, finalFreq); 309 } 310 } 311 if (children != null) { 312 getWordsRec(children, codes, word, depth + 1, 313 true, snr * addedAttenuation, inputIndex + 1, 314 skipPos, callback); 315 } 316 } else if (children != null) { 317 getWordsRec(children, codes, word, depth + 1, 318 false, snr * addedAttenuation, inputIndex + 1, 319 skipPos, callback); 320 } 321 } 322 } 323 } 324 } 325 } 326 327 protected void clearDictionary() { 328 mRoots = new NodeArray(); 329 } 330 331 private class LoadDictionaryTask extends AsyncTask<Void, Void, Void> { 332 @Override 333 protected Void doInBackground(Void... v) { 334 loadDictionaryAsync(); 335 return null; 336 } 337 338 @Override 339 protected void onPostExecute(Void result) { 340 // TODO Auto-generated method stub 341 synchronized (mUpdatingLock) { 342 mUpdatingDictionary = false; 343 } 344 super.onPostExecute(result); 345 } 346 347 } 348 349 static char toLowerCase(char c) { 350 if (c < BASE_CHARS.length) { 351 c = BASE_CHARS[c]; 352 } 353 if (c >= 'A' && c <= 'Z') { 354 c = (char) (c | 32); 355 } else if (c > 127) { 356 c = Character.toLowerCase(c); 357 } 358 return c; 359 } 360 361 /** 362 * Table mapping most combined Latin, Greek, and Cyrillic characters 363 * to their base characters. If c is in range, BASE_CHARS[c] == c 364 * if c is not a combined character, or the base character if it 365 * is combined. 366 */ 367 static final char BASE_CHARS[] = { 368 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 369 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 370 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 371 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 372 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 373 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 374 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 375 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 376 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 377 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 378 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 379 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 380 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 381 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 382 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 383 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 384 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 385 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 386 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 387 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 388 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 389 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, 390 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, 391 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, 392 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, 393 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 394 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, 395 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f 396 // Manually changed df to 73 397 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, 398 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 399 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, 400 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f 401 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 402 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 403 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 404 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 405 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, 406 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 407 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, 408 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 409 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, 410 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, 411 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, 412 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, 413 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, 414 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 415 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 416 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, 417 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, 418 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, 419 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, 420 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, 421 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, 422 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, 423 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, 424 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, 425 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, 426 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, 427 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, 428 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, 429 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, 430 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, 431 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, 432 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, 433 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 434 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, 435 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 436 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, 437 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, 438 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, 439 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, 440 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, 441 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 442 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, 443 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, 444 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, 445 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, 446 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, 447 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 448 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, 449 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 450 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, 451 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 452 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, 453 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, 454 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, 455 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, 456 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, 457 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, 458 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, 459 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, 460 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, 461 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, 462 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, 463 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, 464 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, 465 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 466 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, 467 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 468 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, 469 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 470 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, 471 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 472 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, 473 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, 474 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, 475 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 476 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, 477 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 478 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 479 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, 480 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, 481 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, 482 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, 483 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 484 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 485 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 486 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, 487 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 488 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 489 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 490 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, 491 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, 492 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, 493 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, 494 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, 495 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, 496 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, 497 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 498 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, 499 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 500 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 501 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 502 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 503 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 504 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 505 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 506 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 507 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, 508 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, 509 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, 510 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, 511 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, 512 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, 513 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 514 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, 515 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, 516 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, 517 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, 518 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, 519 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, 520 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, 521 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, 522 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, 523 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, 524 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, 525 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, 526 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, 527 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 528 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, 529 }; 530 531 // generated with: 532 // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' 533 534 } 535