1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.text.TextUtils; 21 import android.util.Log; 22 23 import com.android.inputmethod.keyboard.ProximityInfo; 24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 25 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams; 26 27 import java.util.ArrayList; 28 import java.util.LinkedList; 29 30 /** 31 * Base class for an in-memory dictionary that can grow dynamically and can 32 * be searched for suggestions and valid words. 33 */ 34 public class ExpandableDictionary extends Dictionary { 35 private static final String TAG = ExpandableDictionary.class.getSimpleName(); 36 /** 37 * The weight to give to a word if it's length is the same as the number of typed characters. 38 */ 39 private static final int FULL_WORD_SCORE_MULTIPLIER = 2; 40 41 // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8. 42 protected static final int BIGRAM_MAX_FREQUENCY = 255; 43 44 private Context mContext; 45 private char[] mWordBuilder = new char[Constants.Dictionary.MAX_WORD_LENGTH]; 46 private int mMaxDepth; 47 private int mInputLength; 48 49 private boolean mRequiresReload; 50 51 private boolean mUpdatingDictionary; 52 53 // Use this lock before touching mUpdatingDictionary & mRequiresDownload 54 private Object mUpdatingLock = new Object(); 55 56 private static final class Node { 57 Node() {} 58 char mCode; 59 int mFrequency; 60 boolean mTerminal; 61 Node mParent; 62 NodeArray mChildren; 63 ArrayList<char[]> mShortcutTargets; 64 boolean mShortcutOnly; 65 LinkedList<NextWord> mNGrams; // Supports ngram 66 } 67 68 private static final class NodeArray { 69 Node[] mData; 70 int mLength = 0; 71 private static final int INCREMENT = 2; 72 73 NodeArray() { 74 mData = new Node[INCREMENT]; 75 } 76 77 void add(final Node n) { 78 if (mLength + 1 > mData.length) { 79 Node[] tempData = new Node[mLength + INCREMENT]; 80 if (mLength > 0) { 81 System.arraycopy(mData, 0, tempData, 0, mLength); 82 } 83 mData = tempData; 84 } 85 mData[mLength++] = n; 86 } 87 } 88 89 protected interface NextWord { 90 public Node getWordNode(); 91 public int getFrequency(); 92 public ForgettingCurveParams getFcParams(); 93 public int notifyTypedAgainAndGetFrequency(); 94 } 95 96 private static final class NextStaticWord implements NextWord { 97 public final Node mWord; 98 private final int mFrequency; 99 public NextStaticWord(Node word, int frequency) { 100 mWord = word; 101 mFrequency = frequency; 102 } 103 104 @Override 105 public Node getWordNode() { 106 return mWord; 107 } 108 109 @Override 110 public int getFrequency() { 111 return mFrequency; 112 } 113 114 @Override 115 public ForgettingCurveParams getFcParams() { 116 return null; 117 } 118 119 @Override 120 public int notifyTypedAgainAndGetFrequency() { 121 return mFrequency; 122 } 123 } 124 125 private static final class NextHistoryWord implements NextWord { 126 public final Node mWord; 127 public final ForgettingCurveParams mFcp; 128 129 public NextHistoryWord(Node word, ForgettingCurveParams fcp) { 130 mWord = word; 131 mFcp = fcp; 132 } 133 134 @Override 135 public Node getWordNode() { 136 return mWord; 137 } 138 139 @Override 140 public int getFrequency() { 141 return mFcp.getFrequency(); 142 } 143 144 @Override 145 public ForgettingCurveParams getFcParams() { 146 return mFcp; 147 } 148 149 @Override 150 public int notifyTypedAgainAndGetFrequency() { 151 return mFcp.notifyTypedAgainAndGetFrequency(); 152 } 153 } 154 155 private NodeArray mRoots; 156 157 private int[][] mCodes; 158 159 public ExpandableDictionary(final Context context, final String dictType) { 160 super(dictType); 161 mContext = context; 162 clearDictionary(); 163 mCodes = new int[Constants.Dictionary.MAX_WORD_LENGTH][]; 164 } 165 166 public void loadDictionary() { 167 synchronized (mUpdatingLock) { 168 startDictionaryLoadingTaskLocked(); 169 } 170 } 171 172 public void startDictionaryLoadingTaskLocked() { 173 if (!mUpdatingDictionary) { 174 mUpdatingDictionary = true; 175 mRequiresReload = false; 176 new LoadDictionaryTask().start(); 177 } 178 } 179 180 public void setRequiresReload(final boolean reload) { 181 synchronized (mUpdatingLock) { 182 mRequiresReload = reload; 183 } 184 } 185 186 public boolean getRequiresReload() { 187 return mRequiresReload; 188 } 189 190 /** Override to load your dictionary here, on a background thread. */ 191 public void loadDictionaryAsync() { 192 // empty base implementation 193 } 194 195 public Context getContext() { 196 return mContext; 197 } 198 199 public int getMaxWordLength() { 200 return Constants.Dictionary.MAX_WORD_LENGTH; 201 } 202 203 public void addWord(final String word, final String shortcutTarget, final int frequency) { 204 if (word.length() >= Constants.Dictionary.MAX_WORD_LENGTH) { 205 return; 206 } 207 addWordRec(mRoots, word, 0, shortcutTarget, frequency, null); 208 } 209 210 private void addWordRec(final NodeArray children, final String word, final int depth, 211 final String shortcutTarget, final int frequency, final Node parentNode) { 212 final int wordLength = word.length(); 213 if (wordLength <= depth) return; 214 final char c = word.charAt(depth); 215 // Does children have the current character? 216 final int childrenLength = children.mLength; 217 Node childNode = null; 218 for (int i = 0; i < childrenLength; i++) { 219 final Node node = children.mData[i]; 220 if (node.mCode == c) { 221 childNode = node; 222 break; 223 } 224 } 225 final boolean isShortcutOnly = (null != shortcutTarget); 226 if (childNode == null) { 227 childNode = new Node(); 228 childNode.mCode = c; 229 childNode.mParent = parentNode; 230 childNode.mShortcutOnly = isShortcutOnly; 231 children.add(childNode); 232 } 233 if (wordLength == depth + 1 && shortcutTarget != null) { 234 // Terminate this word 235 childNode.mTerminal = true; 236 if (isShortcutOnly) { 237 if (null == childNode.mShortcutTargets) { 238 childNode.mShortcutTargets = CollectionUtils.newArrayList(); 239 } 240 childNode.mShortcutTargets.add(shortcutTarget.toCharArray()); 241 } else { 242 childNode.mShortcutOnly = false; 243 } 244 childNode.mFrequency = Math.max(frequency, childNode.mFrequency); 245 if (childNode.mFrequency > 255) childNode.mFrequency = 255; 246 return; 247 } 248 if (childNode.mChildren == null) { 249 childNode.mChildren = new NodeArray(); 250 } 251 addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode); 252 } 253 254 @Override 255 public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, 256 final String prevWord, final ProximityInfo proximityInfo, 257 final boolean blockOffensiveWords) { 258 if (reloadDictionaryIfRequired()) return null; 259 if (composer.size() > 1) { 260 if (composer.size() >= Constants.Dictionary.MAX_WORD_LENGTH) { 261 return null; 262 } 263 final ArrayList<SuggestedWordInfo> suggestions = 264 getWordsInner(composer, prevWord, proximityInfo); 265 return suggestions; 266 } else { 267 if (TextUtils.isEmpty(prevWord)) return null; 268 final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); 269 runBigramReverseLookUp(prevWord, suggestions); 270 return suggestions; 271 } 272 } 273 274 // This reloads the dictionary if required, and returns whether it's currently updating its 275 // contents or not. 276 private boolean reloadDictionaryIfRequired() { 277 synchronized (mUpdatingLock) { 278 // If we need to update, start off a background task 279 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 280 return mUpdatingDictionary; 281 } 282 } 283 284 protected ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer codes, 285 final String prevWordForBigrams, final ProximityInfo proximityInfo) { 286 final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); 287 mInputLength = codes.size(); 288 if (mCodes.length < mInputLength) mCodes = new int[mInputLength][]; 289 final InputPointers ips = codes.getInputPointers(); 290 final int[] xCoordinates = ips.getXCoordinates(); 291 final int[] yCoordinates = ips.getYCoordinates(); 292 // Cache the codes so that we don't have to lookup an array list 293 for (int i = 0; i < mInputLength; i++) { 294 // TODO: Calculate proximity info here. 295 if (mCodes[i] == null || mCodes[i].length < 1) { 296 mCodes[i] = new int[ProximityInfo.MAX_PROXIMITY_CHARS_SIZE]; 297 } 298 final int x = xCoordinates != null && i < xCoordinates.length ? 299 xCoordinates[i] : Constants.NOT_A_COORDINATE; 300 final int y = xCoordinates != null && i < yCoordinates.length ? 301 yCoordinates[i] : Constants.NOT_A_COORDINATE; 302 proximityInfo.fillArrayWithNearestKeyCodes(x, y, codes.getCodeAt(i), mCodes[i]); 303 } 304 mMaxDepth = mInputLength * 3; 305 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, suggestions); 306 for (int i = 0; i < mInputLength; i++) { 307 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, suggestions); 308 } 309 return suggestions; 310 } 311 312 @Override 313 public synchronized boolean isValidWord(final String word) { 314 synchronized (mUpdatingLock) { 315 // If we need to update, start off a background task 316 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 317 if (mUpdatingDictionary) return false; 318 } 319 final Node node = searchNode(mRoots, word, 0, word.length()); 320 // If node is null, we didn't find the word, so it's not valid. 321 // If node.mShortcutOnly is true, then it exists as a shortcut but not as a word, 322 // so that means it's not a valid word. 323 // If node.mShortcutOnly is false, then it exists as a word (it may also exist as 324 // a shortcut, but this does not matter), so it's a valid word. 325 return (node == null) ? false : !node.mShortcutOnly; 326 } 327 328 protected boolean removeBigram(final String word1, final String word2) { 329 // Refer to addOrSetBigram() about word1.toLowerCase() 330 final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null); 331 final Node secondWord = searchWord(mRoots, word2, 0, null); 332 LinkedList<NextWord> bigrams = firstWord.mNGrams; 333 NextWord bigramNode = null; 334 if (bigrams == null || bigrams.size() == 0) { 335 return false; 336 } else { 337 for (NextWord nw : bigrams) { 338 if (nw.getWordNode() == secondWord) { 339 bigramNode = nw; 340 break; 341 } 342 } 343 } 344 if (bigramNode == null) { 345 return false; 346 } 347 return bigrams.remove(bigramNode); 348 } 349 350 /** 351 * Returns the word's frequency or -1 if not found 352 */ 353 protected int getWordFrequency(final String word) { 354 // Case-sensitive search 355 final Node node = searchNode(mRoots, word, 0, word.length()); 356 return (node == null) ? -1 : node.mFrequency; 357 } 358 359 protected NextWord getBigramWord(final String word1, final String word2) { 360 // Refer to addOrSetBigram() about word1.toLowerCase() 361 final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null); 362 final Node secondWord = searchWord(mRoots, word2, 0, null); 363 LinkedList<NextWord> bigrams = firstWord.mNGrams; 364 if (bigrams == null || bigrams.size() == 0) { 365 return null; 366 } else { 367 for (NextWord nw : bigrams) { 368 if (nw.getWordNode() == secondWord) { 369 return nw; 370 } 371 } 372 } 373 return null; 374 } 375 376 private static int computeSkippedWordFinalFreq(final int freq, final int snr, 377 final int inputLength) { 378 // The computation itself makes sense for >= 2, but the == 2 case returns 0 379 // anyway so we may as well test against 3 instead and return the constant 380 if (inputLength >= 3) { 381 return (freq * snr * (inputLength - 2)) / (inputLength - 1); 382 } else { 383 return 0; 384 } 385 } 386 387 /** 388 * Helper method to add a word and its shortcuts. 389 * 390 * @param node the terminal node 391 * @param word the word to insert, as an array of code points 392 * @param depth the depth of the node in the tree 393 * @param finalFreq the frequency for this word 394 * @param suggestions the suggestion collection to add the suggestions to 395 * @return whether there is still space for more words. 396 */ 397 private boolean addWordAndShortcutsFromNode(final Node node, final char[] word, final int depth, 398 final int finalFreq, final ArrayList<SuggestedWordInfo> suggestions) { 399 if (finalFreq > 0 && !node.mShortcutOnly) { 400 // Use KIND_CORRECTION always. This dictionary does not really have a notion of 401 // COMPLETION against CORRECTION; we could artificially add one by looking at 402 // the respective size of the typed word and the suggestion if it matters sometime 403 // in the future. 404 suggestions.add(new SuggestedWordInfo(new String(word, 0, depth + 1), finalFreq, 405 SuggestedWordInfo.KIND_CORRECTION, mDictType)); 406 if (suggestions.size() >= Suggest.MAX_SUGGESTIONS) return false; 407 } 408 if (null != node.mShortcutTargets) { 409 final int length = node.mShortcutTargets.size(); 410 for (int shortcutIndex = 0; shortcutIndex < length; ++shortcutIndex) { 411 final char[] shortcut = node.mShortcutTargets.get(shortcutIndex); 412 suggestions.add(new SuggestedWordInfo(new String(shortcut, 0, shortcut.length), 413 finalFreq, SuggestedWordInfo.KIND_SHORTCUT, mDictType)); 414 if (suggestions.size() > Suggest.MAX_SUGGESTIONS) return false; 415 } 416 } 417 return true; 418 } 419 420 /** 421 * Recursively traverse the tree for words that match the input. Input consists of 422 * a list of arrays. Each item in the list is one input character position. An input 423 * character is actually an array of multiple possible candidates. This function is not 424 * optimized for speed, assuming that the user dictionary will only be a few hundred words in 425 * size. 426 * @param roots node whose children have to be search for matches 427 * @param codes the input character codes 428 * @param word the word being composed as a possible match 429 * @param depth the depth of traversal - the length of the word being composed thus far 430 * @param completion whether the traversal is now in completion mode - meaning that we've 431 * exhausted the input and we're looking for all possible suffixes. 432 * @param snr current weight of the word being formed 433 * @param inputIndex position in the input characters. This can be off from the depth in 434 * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type 435 * "wouldve", it could be matching "would've", so the depth will be one more than the 436 * inputIndex 437 * @param suggestions the list in which to add suggestions 438 */ 439 // TODO: Share this routine with the native code for BinaryDictionary 440 protected void getWordsRec(final NodeArray roots, final WordComposer codes, final char[] word, 441 final int depth, final boolean completion, final int snr, final int inputIndex, 442 final int skipPos, final ArrayList<SuggestedWordInfo> suggestions) { 443 final int count = roots.mLength; 444 final int codeSize = mInputLength; 445 // Optimization: Prune out words that are too long compared to how much was typed. 446 if (depth > mMaxDepth) { 447 return; 448 } 449 final int[] currentChars; 450 if (codeSize <= inputIndex) { 451 currentChars = null; 452 } else { 453 currentChars = mCodes[inputIndex]; 454 } 455 456 for (int i = 0; i < count; i++) { 457 final Node node = roots.mData[i]; 458 final char c = node.mCode; 459 final char lowerC = toLowerCase(c); 460 final boolean terminal = node.mTerminal; 461 final NodeArray children = node.mChildren; 462 final int freq = node.mFrequency; 463 if (completion || currentChars == null) { 464 word[depth] = c; 465 if (terminal) { 466 final int finalFreq; 467 if (skipPos < 0) { 468 finalFreq = freq * snr; 469 } else { 470 finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength); 471 } 472 if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, suggestions)) { 473 // No space left in the queue, bail out 474 return; 475 } 476 } 477 if (children != null) { 478 getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex, 479 skipPos, suggestions); 480 } 481 } else if ((c == Constants.CODE_SINGLE_QUOTE 482 && currentChars[0] != Constants.CODE_SINGLE_QUOTE) || depth == skipPos) { 483 // Skip the ' and continue deeper 484 word[depth] = c; 485 if (children != null) { 486 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 487 skipPos, suggestions); 488 } 489 } else { 490 // Don't use alternatives if we're looking for missing characters 491 final int alternativesSize = skipPos >= 0 ? 1 : currentChars.length; 492 for (int j = 0; j < alternativesSize; j++) { 493 final int addedAttenuation = (j > 0 ? 1 : 2); 494 final int currentChar = currentChars[j]; 495 if (currentChar == Constants.NOT_A_CODE) { 496 break; 497 } 498 if (currentChar == lowerC || currentChar == c) { 499 word[depth] = c; 500 501 if (codeSize == inputIndex + 1) { 502 if (terminal) { 503 final int finalFreq; 504 if (skipPos < 0) { 505 finalFreq = freq * snr * addedAttenuation 506 * FULL_WORD_SCORE_MULTIPLIER; 507 } else { 508 finalFreq = computeSkippedWordFinalFreq(freq, 509 snr * addedAttenuation, mInputLength); 510 } 511 if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, 512 suggestions)) { 513 // No space left in the queue, bail out 514 return; 515 } 516 } 517 if (children != null) { 518 getWordsRec(children, codes, word, depth + 1, 519 true, snr * addedAttenuation, inputIndex + 1, 520 skipPos, suggestions); 521 } 522 } else if (children != null) { 523 getWordsRec(children, codes, word, depth + 1, 524 false, snr * addedAttenuation, inputIndex + 1, 525 skipPos, suggestions); 526 } 527 } 528 } 529 } 530 } 531 } 532 533 public int setBigramAndGetFrequency(final String word1, final String word2, 534 final int frequency) { 535 return setBigramAndGetFrequency(word1, word2, frequency, null /* unused */); 536 } 537 538 public int setBigramAndGetFrequency(final String word1, final String word2, 539 final ForgettingCurveParams fcp) { 540 return setBigramAndGetFrequency(word1, word2, 0 /* unused */, fcp); 541 } 542 543 /** 544 * Adds bigrams to the in-memory trie structure that is being used to retrieve any word 545 * @param word1 the first word of this bigram 546 * @param word2 the second word of this bigram 547 * @param frequency frequency for this bigram 548 * @param fcp an instance of ForgettingCurveParams to use for decay policy 549 * @return returns the final bigram frequency 550 */ 551 private int setBigramAndGetFrequency(final String word1, final String word2, 552 final int frequency, final ForgettingCurveParams fcp) { 553 // We don't want results to be different according to case of the looked up left hand side 554 // word. We do want however to return the correct case for the right hand side. 555 // So we want to squash the case of the left hand side, and preserve that of the right 556 // hand side word. 557 final String word1Lower = word1.toLowerCase(); 558 if (TextUtils.isEmpty(word1Lower) || TextUtils.isEmpty(word2)) { 559 Log.e(TAG, "Invalid bigram pair: " + word1 + ", " + word1Lower + ", " + word2); 560 return frequency; 561 } 562 final Node firstWord = searchWord(mRoots, word1Lower, 0, null); 563 final Node secondWord = searchWord(mRoots, word2, 0, null); 564 LinkedList<NextWord> bigrams = firstWord.mNGrams; 565 if (bigrams == null || bigrams.size() == 0) { 566 firstWord.mNGrams = CollectionUtils.newLinkedList(); 567 bigrams = firstWord.mNGrams; 568 } else { 569 for (NextWord nw : bigrams) { 570 if (nw.getWordNode() == secondWord) { 571 return nw.notifyTypedAgainAndGetFrequency(); 572 } 573 } 574 } 575 if (fcp != null) { 576 // history 577 firstWord.mNGrams.add(new NextHistoryWord(secondWord, fcp)); 578 } else { 579 firstWord.mNGrams.add(new NextStaticWord(secondWord, frequency)); 580 } 581 return frequency; 582 } 583 584 /** 585 * Searches for the word and add the word if it does not exist. 586 * @return Returns the terminal node of the word we are searching for. 587 */ 588 private Node searchWord(final NodeArray children, final String word, final int depth, 589 final Node parentNode) { 590 final int wordLength = word.length(); 591 final char c = word.charAt(depth); 592 // Does children have the current character? 593 final int childrenLength = children.mLength; 594 Node childNode = null; 595 for (int i = 0; i < childrenLength; i++) { 596 final Node node = children.mData[i]; 597 if (node.mCode == c) { 598 childNode = node; 599 break; 600 } 601 } 602 if (childNode == null) { 603 childNode = new Node(); 604 childNode.mCode = c; 605 childNode.mParent = parentNode; 606 children.add(childNode); 607 } 608 if (wordLength == depth + 1) { 609 // Terminate this word 610 childNode.mTerminal = true; 611 return childNode; 612 } 613 if (childNode.mChildren == null) { 614 childNode.mChildren = new NodeArray(); 615 } 616 return searchWord(childNode.mChildren, word, depth + 1, childNode); 617 } 618 619 private void runBigramReverseLookUp(final String previousWord, 620 final ArrayList<SuggestedWordInfo> suggestions) { 621 // Search for the lowercase version of the word only, because that's where bigrams 622 // store their sons. 623 final Node prevWord = searchNode(mRoots, previousWord.toLowerCase(), 0, 624 previousWord.length()); 625 if (prevWord != null && prevWord.mNGrams != null) { 626 reverseLookUp(prevWord.mNGrams, suggestions); 627 } 628 } 629 630 // Local to reverseLookUp, but do not allocate each time. 631 private final char[] mLookedUpString = new char[Constants.Dictionary.MAX_WORD_LENGTH]; 632 633 /** 634 * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words 635 * to the suggestions list passed as an argument. 636 * @param terminalNodes list of terminal nodes we want to add 637 * @param suggestions the suggestion collection to add the word to 638 */ 639 private void reverseLookUp(final LinkedList<NextWord> terminalNodes, 640 final ArrayList<SuggestedWordInfo> suggestions) { 641 Node node; 642 int freq; 643 for (NextWord nextWord : terminalNodes) { 644 node = nextWord.getWordNode(); 645 freq = nextWord.getFrequency(); 646 int index = Constants.Dictionary.MAX_WORD_LENGTH; 647 do { 648 --index; 649 mLookedUpString[index] = node.mCode; 650 node = node.mParent; 651 } while (node != null && index > 0); 652 653 // If node is null, we have a word longer than MAX_WORD_LENGTH in the dictionary. 654 // It's a little unclear how this can happen, but just in case it does it's safer 655 // to ignore the word in this case. 656 if (freq >= 0 && node == null) { 657 suggestions.add(new SuggestedWordInfo(new String(mLookedUpString, index, 658 Constants.Dictionary.MAX_WORD_LENGTH - index), 659 freq, SuggestedWordInfo.KIND_CORRECTION, mDictType)); 660 } 661 } 662 } 663 664 /** 665 * Recursively search for the terminal node of the word. 666 * 667 * One iteration takes the full word to search for and the current index of the recursion. 668 * 669 * @param children the node of the trie to search under. 670 * @param word the word to search for. Only read [offset..length] so there may be trailing chars 671 * @param offset the index in {@code word} this recursion should operate on. 672 * @param length the length of the input word. 673 * @return Returns the terminal node of the word if the word exists 674 */ 675 private Node searchNode(final NodeArray children, final CharSequence word, final int offset, 676 final int length) { 677 final int count = children.mLength; 678 final char currentChar = word.charAt(offset); 679 for (int j = 0; j < count; j++) { 680 final Node node = children.mData[j]; 681 if (node.mCode == currentChar) { 682 if (offset == length - 1) { 683 if (node.mTerminal) { 684 return node; 685 } 686 } else { 687 if (node.mChildren != null) { 688 Node returnNode = searchNode(node.mChildren, word, offset + 1, length); 689 if (returnNode != null) return returnNode; 690 } 691 } 692 } 693 } 694 return null; 695 } 696 697 protected void clearDictionary() { 698 mRoots = new NodeArray(); 699 } 700 701 private final class LoadDictionaryTask extends Thread { 702 LoadDictionaryTask() {} 703 @Override 704 public void run() { 705 loadDictionaryAsync(); 706 synchronized (mUpdatingLock) { 707 mUpdatingDictionary = false; 708 } 709 } 710 } 711 712 private static char toLowerCase(final char c) { 713 char baseChar = c; 714 if (c < BASE_CHARS.length) { 715 baseChar = BASE_CHARS[c]; 716 } 717 if (baseChar >= 'A' && baseChar <= 'Z') { 718 return (char)(baseChar | 32); 719 } else if (baseChar > 127) { 720 return Character.toLowerCase(baseChar); 721 } 722 return baseChar; 723 } 724 725 /** 726 * Table mapping most combined Latin, Greek, and Cyrillic characters 727 * to their base characters. If c is in range, BASE_CHARS[c] == c 728 * if c is not a combined character, or the base character if it 729 * is combined. 730 */ 731 private static final char BASE_CHARS[] = { 732 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 733 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 734 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 735 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 736 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 737 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 738 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 739 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 740 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 741 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 742 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 743 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 744 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 745 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 746 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 747 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 748 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 749 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 750 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 751 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 752 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 753 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, 754 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, 755 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, 756 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, 757 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 758 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, 759 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f 760 // Manually changed df to 73 761 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, 762 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 763 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, 764 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f 765 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 766 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 767 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 768 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 769 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, 770 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 771 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, 772 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 773 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, 774 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, 775 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, 776 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, 777 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, 778 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 779 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 780 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, 781 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, 782 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, 783 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, 784 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, 785 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, 786 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, 787 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, 788 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, 789 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, 790 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, 791 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, 792 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, 793 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, 794 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, 795 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, 796 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, 797 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 798 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, 799 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 800 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, 801 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, 802 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, 803 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, 804 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, 805 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 806 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, 807 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, 808 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, 809 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, 810 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, 811 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 812 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, 813 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 814 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, 815 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 816 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, 817 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, 818 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, 819 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, 820 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, 821 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, 822 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, 823 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, 824 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, 825 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, 826 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, 827 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, 828 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, 829 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 830 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, 831 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 832 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, 833 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 834 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, 835 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 836 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, 837 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, 838 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, 839 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 840 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, 841 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 842 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 843 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, 844 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, 845 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, 846 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, 847 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 848 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 849 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 850 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, 851 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 852 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 853 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 854 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, 855 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, 856 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, 857 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, 858 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, 859 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, 860 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, 861 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 862 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, 863 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 864 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 865 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 866 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 867 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 868 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 869 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 870 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 871 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, 872 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, 873 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, 874 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, 875 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, 876 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, 877 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 878 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, 879 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, 880 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, 881 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, 882 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, 883 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, 884 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, 885 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, 886 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, 887 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, 888 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, 889 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, 890 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, 891 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 892 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, 893 }; 894 895 // generated with: 896 // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' 897 898 } 899