1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import java.util.LinkedList; 20 21 import android.content.Context; 22 import android.os.AsyncTask; 23 24 /** 25 * Base class for an in-memory dictionary that can grow dynamically and can 26 * be searched for suggestions and valid words. 27 */ 28 public class ExpandableDictionary extends Dictionary { 29 /** 30 * There is difference between what java and native code can handle. 31 * It uses 32 because Java stack overflows when greater value is used. 32 */ 33 protected static final int MAX_WORD_LENGTH = 32; 34 35 private Context mContext; 36 private char[] mWordBuilder = new char[MAX_WORD_LENGTH]; 37 private int mDicTypeId; 38 private int mMaxDepth; 39 private int mInputLength; 40 private int[] mNextLettersFrequencies; 41 private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH); 42 43 private static final char QUOTE = '\''; 44 45 private boolean mRequiresReload; 46 47 private boolean mUpdatingDictionary; 48 49 // Use this lock before touching mUpdatingDictionary & mRequiresDownload 50 private Object mUpdatingLock = new Object(); 51 52 static class Node { 53 char code; 54 int frequency; 55 boolean terminal; 56 Node parent; 57 NodeArray children; 58 LinkedList<NextWord> ngrams; // Supports ngram 59 } 60 61 static class NodeArray { 62 Node[] data; 63 int length = 0; 64 private static final int INCREMENT = 2; 65 66 NodeArray() { 67 data = new Node[INCREMENT]; 68 } 69 70 void add(Node n) { 71 if (length + 1 > data.length) { 72 Node[] tempData = new Node[length + INCREMENT]; 73 if (length > 0) { 74 System.arraycopy(data, 0, tempData, 0, length); 75 } 76 data = tempData; 77 } 78 data[length++] = n; 79 } 80 } 81 82 static class NextWord { 83 Node word; 84 NextWord nextWord; 85 int frequency; 86 87 NextWord(Node word, int frequency) { 88 this.word = word; 89 this.frequency = frequency; 90 } 91 } 92 93 94 private NodeArray mRoots; 95 96 private int[][] mCodes; 97 98 ExpandableDictionary(Context context, int dicTypeId) { 99 mContext = context; 100 clearDictionary(); 101 mCodes = new int[MAX_WORD_LENGTH][]; 102 mDicTypeId = dicTypeId; 103 } 104 105 public void loadDictionary() { 106 synchronized (mUpdatingLock) { 107 startDictionaryLoadingTaskLocked(); 108 } 109 } 110 111 public void startDictionaryLoadingTaskLocked() { 112 if (!mUpdatingDictionary) { 113 mUpdatingDictionary = true; 114 mRequiresReload = false; 115 new LoadDictionaryTask().execute(); 116 } 117 } 118 119 public void setRequiresReload(boolean reload) { 120 synchronized (mUpdatingLock) { 121 mRequiresReload = reload; 122 } 123 } 124 125 public boolean getRequiresReload() { 126 return mRequiresReload; 127 } 128 129 /** Override to load your dictionary here, on a background thread. */ 130 public void loadDictionaryAsync() { 131 } 132 133 Context getContext() { 134 return mContext; 135 } 136 137 int getMaxWordLength() { 138 return MAX_WORD_LENGTH; 139 } 140 141 public void addWord(String word, int frequency) { 142 addWordRec(mRoots, word, 0, frequency, null); 143 } 144 145 private void addWordRec(NodeArray children, final String word, final int depth, 146 final int frequency, Node parentNode) { 147 final int wordLength = word.length(); 148 final char c = word.charAt(depth); 149 // Does children have the current character? 150 final int childrenLength = children.length; 151 Node childNode = null; 152 boolean found = false; 153 for (int i = 0; i < childrenLength; i++) { 154 childNode = children.data[i]; 155 if (childNode.code == c) { 156 found = true; 157 break; 158 } 159 } 160 if (!found) { 161 childNode = new Node(); 162 childNode.code = c; 163 childNode.parent = parentNode; 164 children.add(childNode); 165 } 166 if (wordLength == depth + 1) { 167 // Terminate this word 168 childNode.terminal = true; 169 childNode.frequency = Math.max(frequency, childNode.frequency); 170 if (childNode.frequency > 255) childNode.frequency = 255; 171 return; 172 } 173 if (childNode.children == null) { 174 childNode.children = new NodeArray(); 175 } 176 addWordRec(childNode.children, word, depth + 1, frequency, childNode); 177 } 178 179 @Override 180 public void getWords(final WordComposer codes, final WordCallback callback, 181 int[] nextLettersFrequencies) { 182 synchronized (mUpdatingLock) { 183 // If we need to update, start off a background task 184 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 185 // Currently updating contacts, don't return any results. 186 if (mUpdatingDictionary) return; 187 } 188 189 mInputLength = codes.size(); 190 mNextLettersFrequencies = nextLettersFrequencies; 191 if (mCodes.length < mInputLength) mCodes = new int[mInputLength][]; 192 // Cache the codes so that we don't have to lookup an array list 193 for (int i = 0; i < mInputLength; i++) { 194 mCodes[i] = codes.getCodesAt(i); 195 } 196 mMaxDepth = mInputLength * 3; 197 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback); 198 for (int i = 0; i < mInputLength; i++) { 199 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback); 200 } 201 } 202 203 @Override 204 public synchronized boolean isValidWord(CharSequence word) { 205 synchronized (mUpdatingLock) { 206 // If we need to update, start off a background task 207 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 208 if (mUpdatingDictionary) return false; 209 } 210 final int freq = getWordFrequency(word); 211 return freq > -1; 212 } 213 214 /** 215 * Returns the word's frequency or -1 if not found 216 */ 217 public int getWordFrequency(CharSequence word) { 218 Node node = searchNode(mRoots, word, 0, word.length()); 219 return (node == null) ? -1 : node.frequency; 220 } 221 222 /** 223 * Recursively traverse the tree for words that match the input. Input consists of 224 * a list of arrays. Each item in the list is one input character position. An input 225 * character is actually an array of multiple possible candidates. This function is not 226 * optimized for speed, assuming that the user dictionary will only be a few hundred words in 227 * size. 228 * @param roots node whose children have to be search for matches 229 * @param codes the input character codes 230 * @param word the word being composed as a possible match 231 * @param depth the depth of traversal - the length of the word being composed thus far 232 * @param completion whether the traversal is now in completion mode - meaning that we've 233 * exhausted the input and we're looking for all possible suffixes. 234 * @param snr current weight of the word being formed 235 * @param inputIndex position in the input characters. This can be off from the depth in 236 * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type 237 * "wouldve", it could be matching "would've", so the depth will be one more than the 238 * inputIndex 239 * @param callback the callback class for adding a word 240 */ 241 protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, 242 final int depth, boolean completion, int snr, int inputIndex, int skipPos, 243 WordCallback callback) { 244 final int count = roots.length; 245 final int codeSize = mInputLength; 246 // Optimization: Prune out words that are too long compared to how much was typed. 247 if (depth > mMaxDepth) { 248 return; 249 } 250 int[] currentChars = null; 251 if (codeSize <= inputIndex) { 252 completion = true; 253 } else { 254 currentChars = mCodes[inputIndex]; 255 } 256 257 for (int i = 0; i < count; i++) { 258 final Node node = roots.data[i]; 259 final char c = node.code; 260 final char lowerC = toLowerCase(c); 261 final boolean terminal = node.terminal; 262 final NodeArray children = node.children; 263 final int freq = node.frequency; 264 if (completion) { 265 word[depth] = c; 266 if (terminal) { 267 if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId, 268 DataType.UNIGRAM)) { 269 return; 270 } 271 // Add to frequency of next letters for predictive correction 272 if (mNextLettersFrequencies != null && depth >= inputIndex && skipPos < 0 273 && mNextLettersFrequencies.length > word[inputIndex]) { 274 mNextLettersFrequencies[word[inputIndex]]++; 275 } 276 } 277 if (children != null) { 278 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 279 skipPos, callback); 280 } 281 } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) { 282 // Skip the ' and continue deeper 283 word[depth] = c; 284 if (children != null) { 285 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 286 skipPos, callback); 287 } 288 } else { 289 // Don't use alternatives if we're looking for missing characters 290 final int alternativesSize = skipPos >= 0? 1 : currentChars.length; 291 for (int j = 0; j < alternativesSize; j++) { 292 final int addedAttenuation = (j > 0 ? 1 : 2); 293 final int currentChar = currentChars[j]; 294 if (currentChar == -1) { 295 break; 296 } 297 if (currentChar == lowerC || currentChar == c) { 298 word[depth] = c; 299 300 if (codeSize == inputIndex + 1) { 301 if (terminal) { 302 if (INCLUDE_TYPED_WORD_IF_VALID 303 || !same(word, depth + 1, codes.getTypedWord())) { 304 int finalFreq = freq * snr * addedAttenuation; 305 if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER; 306 callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, 307 DataType.UNIGRAM); 308 } 309 } 310 if (children != null) { 311 getWordsRec(children, codes, word, depth + 1, 312 true, snr * addedAttenuation, inputIndex + 1, 313 skipPos, callback); 314 } 315 } else if (children != null) { 316 getWordsRec(children, codes, word, depth + 1, 317 false, snr * addedAttenuation, inputIndex + 1, 318 skipPos, callback); 319 } 320 } 321 } 322 } 323 } 324 } 325 326 protected int setBigram(String word1, String word2, int frequency) { 327 return addOrSetBigram(word1, word2, frequency, false); 328 } 329 330 protected int addBigram(String word1, String word2, int frequency) { 331 return addOrSetBigram(word1, word2, frequency, true); 332 } 333 334 /** 335 * Adds bigrams to the in-memory trie structure that is being used to retrieve any word 336 * @param frequency frequency for this bigrams 337 * @param addFrequency if true, it adds to current frequency 338 * @return returns the final frequency 339 */ 340 private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) { 341 Node firstWord = searchWord(mRoots, word1, 0, null); 342 Node secondWord = searchWord(mRoots, word2, 0, null); 343 LinkedList<NextWord> bigram = firstWord.ngrams; 344 if (bigram == null || bigram.size() == 0) { 345 firstWord.ngrams = new LinkedList<NextWord>(); 346 bigram = firstWord.ngrams; 347 } else { 348 for (NextWord nw : bigram) { 349 if (nw.word == secondWord) { 350 if (addFrequency) { 351 nw.frequency += frequency; 352 } else { 353 nw.frequency = frequency; 354 } 355 return nw.frequency; 356 } 357 } 358 } 359 NextWord nw = new NextWord(secondWord, frequency); 360 firstWord.ngrams.add(nw); 361 return frequency; 362 } 363 364 /** 365 * Searches for the word and add the word if it does not exist. 366 * @return Returns the terminal node of the word we are searching for. 367 */ 368 private Node searchWord(NodeArray children, String word, int depth, Node parentNode) { 369 final int wordLength = word.length(); 370 final char c = word.charAt(depth); 371 // Does children have the current character? 372 final int childrenLength = children.length; 373 Node childNode = null; 374 boolean found = false; 375 for (int i = 0; i < childrenLength; i++) { 376 childNode = children.data[i]; 377 if (childNode.code == c) { 378 found = true; 379 break; 380 } 381 } 382 if (!found) { 383 childNode = new Node(); 384 childNode.code = c; 385 childNode.parent = parentNode; 386 children.add(childNode); 387 } 388 if (wordLength == depth + 1) { 389 // Terminate this word 390 childNode.terminal = true; 391 return childNode; 392 } 393 if (childNode.children == null) { 394 childNode.children = new NodeArray(); 395 } 396 return searchWord(childNode.children, word, depth + 1, childNode); 397 } 398 399 // @VisibleForTesting 400 boolean reloadDictionaryIfRequired() { 401 synchronized (mUpdatingLock) { 402 // If we need to update, start off a background task 403 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 404 // Currently updating contacts, don't return any results. 405 return mUpdatingDictionary; 406 } 407 } 408 409 private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) { 410 Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length()); 411 if (prevWord != null && prevWord.ngrams != null) { 412 reverseLookUp(prevWord.ngrams, callback); 413 } 414 } 415 416 @Override 417 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 418 final WordCallback callback, int[] nextLettersFrequencies) { 419 if (!reloadDictionaryIfRequired()) { 420 runReverseLookUp(previousWord, callback); 421 } 422 } 423 424 /** 425 * Used only for testing purposes 426 * This function will wait for loading from database to be done 427 */ 428 void waitForDictionaryLoading() { 429 while (mUpdatingDictionary) { 430 try { 431 Thread.sleep(100); 432 } catch (InterruptedException e) { 433 } 434 } 435 } 436 437 /** 438 * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words 439 * through callback. 440 * @param terminalNodes list of terminal nodes we want to add 441 */ 442 private void reverseLookUp(LinkedList<NextWord> terminalNodes, 443 final WordCallback callback) { 444 Node node; 445 int freq; 446 for (NextWord nextWord : terminalNodes) { 447 node = nextWord.word; 448 freq = nextWord.frequency; 449 // TODO Not the best way to limit suggestion threshold 450 if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) { 451 sb.setLength(0); 452 do { 453 sb.insert(0, node.code); 454 node = node.parent; 455 } while(node != null); 456 457 // TODO better way to feed char array? 458 callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId, 459 DataType.BIGRAM); 460 } 461 } 462 } 463 464 /** 465 * Search for the terminal node of the word 466 * @return Returns the terminal node of the word if the word exists 467 */ 468 private Node searchNode(final NodeArray children, final CharSequence word, final int offset, 469 final int length) { 470 // TODO Consider combining with addWordRec 471 final int count = children.length; 472 char currentChar = word.charAt(offset); 473 for (int j = 0; j < count; j++) { 474 final Node node = children.data[j]; 475 if (node.code == currentChar) { 476 if (offset == length - 1) { 477 if (node.terminal) { 478 return node; 479 } 480 } else { 481 if (node.children != null) { 482 Node returnNode = searchNode(node.children, word, offset + 1, length); 483 if (returnNode != null) return returnNode; 484 } 485 } 486 } 487 } 488 return null; 489 } 490 491 protected void clearDictionary() { 492 mRoots = new NodeArray(); 493 } 494 495 private class LoadDictionaryTask extends AsyncTask<Void, Void, Void> { 496 @Override 497 protected Void doInBackground(Void... v) { 498 loadDictionaryAsync(); 499 synchronized (mUpdatingLock) { 500 mUpdatingDictionary = false; 501 } 502 return null; 503 } 504 } 505 506 static char toLowerCase(char c) { 507 if (c < BASE_CHARS.length) { 508 c = BASE_CHARS[c]; 509 } 510 if (c >= 'A' && c <= 'Z') { 511 c = (char) (c | 32); 512 } else if (c > 127) { 513 c = Character.toLowerCase(c); 514 } 515 return c; 516 } 517 518 /** 519 * Table mapping most combined Latin, Greek, and Cyrillic characters 520 * to their base characters. If c is in range, BASE_CHARS[c] == c 521 * if c is not a combined character, or the base character if it 522 * is combined. 523 */ 524 static final char BASE_CHARS[] = { 525 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 526 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 527 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 528 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 529 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 530 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 531 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 532 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 533 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 534 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 535 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 536 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 537 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 538 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 539 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 540 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 541 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 542 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 543 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 544 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 545 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 546 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, 547 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, 548 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, 549 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, 550 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 551 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, 552 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f 553 // Manually changed df to 73 554 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, 555 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 556 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, 557 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f 558 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 559 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 560 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 561 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 562 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, 563 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 564 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, 565 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 566 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, 567 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, 568 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, 569 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, 570 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, 571 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 572 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 573 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, 574 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, 575 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, 576 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, 577 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, 578 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, 579 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, 580 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, 581 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, 582 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, 583 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, 584 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, 585 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, 586 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, 587 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, 588 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, 589 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, 590 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 591 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, 592 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 593 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, 594 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, 595 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, 596 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, 597 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, 598 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 599 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, 600 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, 601 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, 602 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, 603 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, 604 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 605 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, 606 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 607 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, 608 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 609 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, 610 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, 611 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, 612 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, 613 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, 614 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, 615 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, 616 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, 617 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, 618 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, 619 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, 620 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, 621 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, 622 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 623 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, 624 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 625 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, 626 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 627 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, 628 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 629 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, 630 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, 631 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, 632 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 633 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, 634 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 635 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 636 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, 637 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, 638 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, 639 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, 640 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 641 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 642 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 643 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, 644 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 645 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 646 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 647 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, 648 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, 649 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, 650 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, 651 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, 652 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, 653 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, 654 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 655 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, 656 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 657 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 658 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 659 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 660 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 661 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 662 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 663 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 664 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, 665 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, 666 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, 667 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, 668 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, 669 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, 670 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 671 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, 672 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, 673 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, 674 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, 675 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, 676 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, 677 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, 678 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, 679 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, 680 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, 681 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, 682 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, 683 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, 684 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 685 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, 686 }; 687 688 // generated with: 689 // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' 690 691 } 692