1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 21 import com.android.inputmethod.keyboard.Keyboard; 22 import com.android.inputmethod.keyboard.ProximityInfo; 23 24 import java.util.LinkedList; 25 26 /** 27 * Base class for an in-memory dictionary that can grow dynamically and can 28 * be searched for suggestions and valid words. 29 */ 30 public class ExpandableDictionary extends Dictionary { 31 /** 32 * There is difference between what java and native code can handle. 33 * It uses 32 because Java stack overflows when greater value is used. 34 */ 35 protected static final int MAX_WORD_LENGTH = 32; 36 37 // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8. 38 protected static final int BIGRAM_MAX_FREQUENCY = 255; 39 40 private Context mContext; 41 private char[] mWordBuilder = new char[MAX_WORD_LENGTH]; 42 private int mDicTypeId; 43 private int mMaxDepth; 44 private int mInputLength; 45 46 private boolean mRequiresReload; 47 48 private boolean mUpdatingDictionary; 49 50 // Use this lock before touching mUpdatingDictionary & mRequiresDownload 51 private Object mUpdatingLock = new Object(); 52 53 private static class Node { 54 char mCode; 55 int mFrequency; 56 boolean mTerminal; 57 Node mParent; 58 NodeArray mChildren; 59 LinkedList<NextWord> mNGrams; // Supports ngram 60 } 61 62 private static class NodeArray { 63 Node[] mData; 64 int mLength = 0; 65 private static final int INCREMENT = 2; 66 67 NodeArray() { 68 mData = new Node[INCREMENT]; 69 } 70 71 void add(Node n) { 72 if (mLength + 1 > mData.length) { 73 Node[] tempData = new Node[mLength + INCREMENT]; 74 if (mLength > 0) { 75 System.arraycopy(mData, 0, tempData, 0, mLength); 76 } 77 mData = tempData; 78 } 79 mData[mLength++] = n; 80 } 81 } 82 83 private static class NextWord { 84 public final Node mWord; 85 private int mFrequency; 86 87 public NextWord(Node word, int frequency) { 88 mWord = word; 89 mFrequency = frequency; 90 } 91 92 public int getFrequency() { 93 return mFrequency; 94 } 95 96 public int setFrequency(int freq) { 97 mFrequency = freq; 98 return mFrequency; 99 } 100 101 public int addFrequency(int add) { 102 mFrequency += add; 103 if (mFrequency > BIGRAM_MAX_FREQUENCY) mFrequency = BIGRAM_MAX_FREQUENCY; 104 return mFrequency; 105 } 106 } 107 108 private NodeArray mRoots; 109 110 private int[][] mCodes; 111 112 public ExpandableDictionary(Context context, int dicTypeId) { 113 mContext = context; 114 clearDictionary(); 115 mCodes = new int[MAX_WORD_LENGTH][]; 116 mDicTypeId = dicTypeId; 117 } 118 119 public void loadDictionary() { 120 synchronized (mUpdatingLock) { 121 startDictionaryLoadingTaskLocked(); 122 } 123 } 124 125 public void startDictionaryLoadingTaskLocked() { 126 if (!mUpdatingDictionary) { 127 mUpdatingDictionary = true; 128 mRequiresReload = false; 129 new LoadDictionaryTask().start(); 130 } 131 } 132 133 public void setRequiresReload(boolean reload) { 134 synchronized (mUpdatingLock) { 135 mRequiresReload = reload; 136 } 137 } 138 139 public boolean getRequiresReload() { 140 return mRequiresReload; 141 } 142 143 /** Override to load your dictionary here, on a background thread. */ 144 public void loadDictionaryAsync() { 145 // empty base implementation 146 } 147 148 public Context getContext() { 149 return mContext; 150 } 151 152 public int getMaxWordLength() { 153 return MAX_WORD_LENGTH; 154 } 155 156 public void addWord(String word, int frequency) { 157 addWordRec(mRoots, word, 0, frequency, null); 158 } 159 160 private void addWordRec(NodeArray children, final String word, final int depth, 161 final int frequency, Node parentNode) { 162 final int wordLength = word.length(); 163 if (wordLength <= depth) return; 164 final char c = word.charAt(depth); 165 // Does children have the current character? 166 final int childrenLength = children.mLength; 167 Node childNode = null; 168 for (int i = 0; i < childrenLength; i++) { 169 final Node node = children.mData[i]; 170 if (node.mCode == c) { 171 childNode = node; 172 break; 173 } 174 } 175 if (childNode == null) { 176 childNode = new Node(); 177 childNode.mCode = c; 178 childNode.mParent = parentNode; 179 children.add(childNode); 180 } 181 if (wordLength == depth + 1) { 182 // Terminate this word 183 childNode.mTerminal = true; 184 childNode.mFrequency = Math.max(frequency, childNode.mFrequency); 185 if (childNode.mFrequency > 255) childNode.mFrequency = 255; 186 return; 187 } 188 if (childNode.mChildren == null) { 189 childNode.mChildren = new NodeArray(); 190 } 191 addWordRec(childNode.mChildren, word, depth + 1, frequency, childNode); 192 } 193 194 @Override 195 public void getWords(final WordComposer codes, final WordCallback callback, 196 final ProximityInfo proximityInfo) { 197 synchronized (mUpdatingLock) { 198 // If we need to update, start off a background task 199 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 200 // Currently updating contacts, don't return any results. 201 if (mUpdatingDictionary) return; 202 } 203 getWordsInner(codes, callback, proximityInfo); 204 } 205 206 protected final void getWordsInner(final WordComposer codes, final WordCallback callback, 207 @SuppressWarnings("unused") final ProximityInfo proximityInfo) { 208 mInputLength = codes.size(); 209 if (mCodes.length < mInputLength) mCodes = new int[mInputLength][]; 210 // Cache the codes so that we don't have to lookup an array list 211 for (int i = 0; i < mInputLength; i++) { 212 mCodes[i] = codes.getCodesAt(i); 213 } 214 mMaxDepth = mInputLength * 3; 215 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback); 216 for (int i = 0; i < mInputLength; i++) { 217 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback); 218 } 219 } 220 221 @Override 222 public synchronized boolean isValidWord(CharSequence word) { 223 synchronized (mUpdatingLock) { 224 // If we need to update, start off a background task 225 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 226 if (mUpdatingDictionary) return false; 227 } 228 return getWordFrequency(word) > -1; 229 } 230 231 /** 232 * Returns the word's frequency or -1 if not found 233 */ 234 protected int getWordFrequency(CharSequence word) { 235 // Case-sensitive search 236 Node node = searchNode(mRoots, word, 0, word.length()); 237 return (node == null) ? -1 : node.mFrequency; 238 } 239 240 private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) { 241 // The computation itself makes sense for >= 2, but the == 2 case returns 0 242 // anyway so we may as well test against 3 instead and return the constant 243 if (inputLength >= 3) { 244 return (freq * snr * (inputLength - 2)) / (inputLength - 1); 245 } else { 246 return 0; 247 } 248 } 249 250 /** 251 * Recursively traverse the tree for words that match the input. Input consists of 252 * a list of arrays. Each item in the list is one input character position. An input 253 * character is actually an array of multiple possible candidates. This function is not 254 * optimized for speed, assuming that the user dictionary will only be a few hundred words in 255 * size. 256 * @param roots node whose children have to be search for matches 257 * @param codes the input character codes 258 * @param word the word being composed as a possible match 259 * @param depth the depth of traversal - the length of the word being composed thus far 260 * @param completion whether the traversal is now in completion mode - meaning that we've 261 * exhausted the input and we're looking for all possible suffixes. 262 * @param snr current weight of the word being formed 263 * @param inputIndex position in the input characters. This can be off from the depth in 264 * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type 265 * "wouldve", it could be matching "would've", so the depth will be one more than the 266 * inputIndex 267 * @param callback the callback class for adding a word 268 */ 269 // TODO: Share this routine with the native code for BinaryDictionary 270 protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, 271 final int depth, final boolean completion, int snr, int inputIndex, int skipPos, 272 WordCallback callback) { 273 final int count = roots.mLength; 274 final int codeSize = mInputLength; 275 // Optimization: Prune out words that are too long compared to how much was typed. 276 if (depth > mMaxDepth) { 277 return; 278 } 279 final int[] currentChars; 280 if (codeSize <= inputIndex) { 281 currentChars = null; 282 } else { 283 currentChars = mCodes[inputIndex]; 284 } 285 286 for (int i = 0; i < count; i++) { 287 final Node node = roots.mData[i]; 288 final char c = node.mCode; 289 final char lowerC = toLowerCase(c); 290 final boolean terminal = node.mTerminal; 291 final NodeArray children = node.mChildren; 292 final int freq = node.mFrequency; 293 if (completion || currentChars == null) { 294 word[depth] = c; 295 if (terminal) { 296 final int finalFreq; 297 if (skipPos < 0) { 298 finalFreq = freq * snr; 299 } else { 300 finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength); 301 } 302 if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, 303 DataType.UNIGRAM)) { 304 return; 305 } 306 } 307 if (children != null) { 308 getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex, 309 skipPos, callback); 310 } 311 } else if ((c == Keyboard.CODE_SINGLE_QUOTE 312 && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) { 313 // Skip the ' and continue deeper 314 word[depth] = c; 315 if (children != null) { 316 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 317 skipPos, callback); 318 } 319 } else { 320 // Don't use alternatives if we're looking for missing characters 321 final int alternativesSize = skipPos >= 0? 1 : currentChars.length; 322 for (int j = 0; j < alternativesSize; j++) { 323 final int addedAttenuation = (j > 0 ? 1 : 2); 324 final int currentChar = currentChars[j]; 325 if (currentChar == -1) { 326 break; 327 } 328 if (currentChar == lowerC || currentChar == c) { 329 word[depth] = c; 330 331 if (codeSize == inputIndex + 1) { 332 if (terminal) { 333 if (INCLUDE_TYPED_WORD_IF_VALID 334 || !same(word, depth + 1, codes.getTypedWord())) { 335 final int finalFreq; 336 if (skipPos < 0) { 337 finalFreq = freq * snr * addedAttenuation 338 * FULL_WORD_SCORE_MULTIPLIER; 339 } else { 340 finalFreq = computeSkippedWordFinalFreq(freq, 341 snr * addedAttenuation, mInputLength); 342 } 343 callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, 344 DataType.UNIGRAM); 345 } 346 } 347 if (children != null) { 348 getWordsRec(children, codes, word, depth + 1, 349 true, snr * addedAttenuation, inputIndex + 1, 350 skipPos, callback); 351 } 352 } else if (children != null) { 353 getWordsRec(children, codes, word, depth + 1, 354 false, snr * addedAttenuation, inputIndex + 1, 355 skipPos, callback); 356 } 357 } 358 } 359 } 360 } 361 } 362 363 protected int setBigram(String word1, String word2, int frequency) { 364 return addOrSetBigram(word1, word2, frequency, false); 365 } 366 367 protected int addBigram(String word1, String word2, int frequency) { 368 return addOrSetBigram(word1, word2, frequency, true); 369 } 370 371 /** 372 * Adds bigrams to the in-memory trie structure that is being used to retrieve any word 373 * @param frequency frequency for this bigram 374 * @param addFrequency if true, it adds to current frequency, else it overwrites the old value 375 * @return returns the final frequency 376 */ 377 private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) { 378 // We don't want results to be different according to case of the looked up left hand side 379 // word. We do want however to return the correct case for the right hand side. 380 // So we want to squash the case of the left hand side, and preserve that of the right 381 // hand side word. 382 Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null); 383 Node secondWord = searchWord(mRoots, word2, 0, null); 384 LinkedList<NextWord> bigram = firstWord.mNGrams; 385 if (bigram == null || bigram.size() == 0) { 386 firstWord.mNGrams = new LinkedList<NextWord>(); 387 bigram = firstWord.mNGrams; 388 } else { 389 for (NextWord nw : bigram) { 390 if (nw.mWord == secondWord) { 391 if (addFrequency) { 392 return nw.addFrequency(frequency); 393 } else { 394 return nw.setFrequency(frequency); 395 } 396 } 397 } 398 } 399 firstWord.mNGrams.add(new NextWord(secondWord, frequency)); 400 return frequency; 401 } 402 403 /** 404 * Searches for the word and add the word if it does not exist. 405 * @return Returns the terminal node of the word we are searching for. 406 */ 407 private Node searchWord(NodeArray children, String word, int depth, Node parentNode) { 408 final int wordLength = word.length(); 409 final char c = word.charAt(depth); 410 // Does children have the current character? 411 final int childrenLength = children.mLength; 412 Node childNode = null; 413 for (int i = 0; i < childrenLength; i++) { 414 final Node node = children.mData[i]; 415 if (node.mCode == c) { 416 childNode = node; 417 break; 418 } 419 } 420 if (childNode == null) { 421 childNode = new Node(); 422 childNode.mCode = c; 423 childNode.mParent = parentNode; 424 children.add(childNode); 425 } 426 if (wordLength == depth + 1) { 427 // Terminate this word 428 childNode.mTerminal = true; 429 return childNode; 430 } 431 if (childNode.mChildren == null) { 432 childNode.mChildren = new NodeArray(); 433 } 434 return searchWord(childNode.mChildren, word, depth + 1, childNode); 435 } 436 437 // @VisibleForTesting 438 boolean reloadDictionaryIfRequired() { 439 synchronized (mUpdatingLock) { 440 // If we need to update, start off a background task 441 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 442 // Currently updating contacts, don't return any results. 443 return mUpdatingDictionary; 444 } 445 } 446 447 private void runBigramReverseLookUp(final CharSequence previousWord, 448 final WordCallback callback) { 449 // Search for the lowercase version of the word only, because that's where bigrams 450 // store their sons. 451 Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0, 452 previousWord.length()); 453 if (prevWord != null && prevWord.mNGrams != null) { 454 reverseLookUp(prevWord.mNGrams, callback); 455 } 456 } 457 458 @Override 459 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 460 final WordCallback callback) { 461 if (!reloadDictionaryIfRequired()) { 462 runBigramReverseLookUp(previousWord, callback); 463 } 464 } 465 466 /** 467 * Used for testing purposes and in the spell checker 468 * This function will wait for loading from database to be done 469 */ 470 void waitForDictionaryLoading() { 471 while (mUpdatingDictionary) { 472 try { 473 Thread.sleep(100); 474 } catch (InterruptedException e) { 475 // 476 } 477 } 478 } 479 480 protected final void blockingReloadDictionaryIfRequired() { 481 reloadDictionaryIfRequired(); 482 waitForDictionaryLoading(); 483 } 484 485 // Local to reverseLookUp, but do not allocate each time. 486 private final char[] mLookedUpString = new char[MAX_WORD_LENGTH]; 487 488 /** 489 * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words 490 * through callback. 491 * @param terminalNodes list of terminal nodes we want to add 492 */ 493 private void reverseLookUp(LinkedList<NextWord> terminalNodes, 494 final WordCallback callback) { 495 Node node; 496 int freq; 497 for (NextWord nextWord : terminalNodes) { 498 node = nextWord.mWord; 499 freq = nextWord.getFrequency(); 500 int index = MAX_WORD_LENGTH; 501 do { 502 --index; 503 mLookedUpString[index] = node.mCode; 504 node = node.mParent; 505 } while (node != null); 506 507 callback.addWord(mLookedUpString, index, MAX_WORD_LENGTH - index, freq, mDicTypeId, 508 DataType.BIGRAM); 509 } 510 } 511 512 /** 513 * Recursively search for the terminal node of the word. 514 * 515 * One iteration takes the full word to search for and the current index of the recursion. 516 * 517 * @param children the node of the trie to search under. 518 * @param word the word to search for. Only read [offset..length] so there may be trailing chars 519 * @param offset the index in {@code word} this recursion should operate on. 520 * @param length the length of the input word. 521 * @return Returns the terminal node of the word if the word exists 522 */ 523 private Node searchNode(final NodeArray children, final CharSequence word, final int offset, 524 final int length) { 525 final int count = children.mLength; 526 final char currentChar = word.charAt(offset); 527 for (int j = 0; j < count; j++) { 528 final Node node = children.mData[j]; 529 if (node.mCode == currentChar) { 530 if (offset == length - 1) { 531 if (node.mTerminal) { 532 return node; 533 } 534 } else { 535 if (node.mChildren != null) { 536 Node returnNode = searchNode(node.mChildren, word, offset + 1, length); 537 if (returnNode != null) return returnNode; 538 } 539 } 540 } 541 } 542 return null; 543 } 544 545 protected void clearDictionary() { 546 mRoots = new NodeArray(); 547 } 548 549 private class LoadDictionaryTask extends Thread { 550 @Override 551 public void run() { 552 loadDictionaryAsync(); 553 synchronized (mUpdatingLock) { 554 mUpdatingDictionary = false; 555 } 556 } 557 } 558 559 private static char toLowerCase(char c) { 560 char baseChar = c; 561 if (c < BASE_CHARS.length) { 562 baseChar = BASE_CHARS[c]; 563 } 564 if (baseChar >= 'A' && baseChar <= 'Z') { 565 return (char)(baseChar | 32); 566 } else if (baseChar > 127) { 567 return Character.toLowerCase(baseChar); 568 } 569 return baseChar; 570 } 571 572 /** 573 * Table mapping most combined Latin, Greek, and Cyrillic characters 574 * to their base characters. If c is in range, BASE_CHARS[c] == c 575 * if c is not a combined character, or the base character if it 576 * is combined. 577 */ 578 private static final char BASE_CHARS[] = { 579 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 580 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 581 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 582 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 583 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 584 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 585 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 586 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 587 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 588 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 589 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 590 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 591 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 592 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 593 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 594 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 595 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 596 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 597 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 598 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 599 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 600 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, 601 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, 602 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, 603 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, 604 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 605 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, 606 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f 607 // Manually changed df to 73 608 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, 609 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 610 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, 611 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f 612 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 613 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 614 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 615 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 616 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, 617 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 618 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, 619 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 620 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, 621 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, 622 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, 623 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, 624 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, 625 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 626 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 627 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, 628 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, 629 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, 630 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, 631 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, 632 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, 633 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, 634 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, 635 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, 636 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, 637 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, 638 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, 639 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, 640 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, 641 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, 642 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, 643 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, 644 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 645 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, 646 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 647 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, 648 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, 649 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, 650 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, 651 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, 652 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 653 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, 654 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, 655 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, 656 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, 657 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, 658 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 659 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, 660 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 661 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, 662 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 663 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, 664 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, 665 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, 666 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, 667 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, 668 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, 669 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, 670 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, 671 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, 672 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, 673 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, 674 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, 675 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, 676 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 677 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, 678 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 679 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, 680 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 681 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, 682 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 683 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, 684 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, 685 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, 686 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 687 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, 688 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 689 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 690 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, 691 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, 692 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, 693 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, 694 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 695 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 696 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 697 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, 698 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 699 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 700 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 701 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, 702 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, 703 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, 704 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, 705 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, 706 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, 707 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, 708 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 709 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, 710 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 711 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 712 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 713 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 714 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 715 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 716 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 717 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 718 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, 719 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, 720 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, 721 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, 722 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, 723 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, 724 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 725 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, 726 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, 727 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, 728 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, 729 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, 730 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, 731 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, 732 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, 733 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, 734 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, 735 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, 736 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, 737 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, 738 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 739 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, 740 }; 741 742 // generated with: 743 // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' 744 745 } 746