Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2010, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <cstring>
     18 
     19 #define LOG_TAG "LatinIME: unigram_dictionary.cpp"
     20 
     21 #include "binary_format.h"
     22 #include "char_utils.h"
     23 #include "defines.h"
     24 #include "dictionary.h"
     25 #include "digraph_utils.h"
     26 #include "proximity_info.h"
     27 #include "terminal_attributes.h"
     28 #include "unigram_dictionary.h"
     29 #include "words_priority_queue.h"
     30 #include "words_priority_queue_pool.h"
     31 
     32 namespace latinime {
     33 
     34 // TODO: check the header
     35 UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int dictFlags)
     36         : DICT_ROOT(streamStart), ROOT_POS(0),
     37           MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), DICT_FLAGS(dictFlags) {
     38     if (DEBUG_DICT) {
     39         AKLOGI("UnigramDictionary - constructor");
     40     }
     41 }
     42 
     43 UnigramDictionary::~UnigramDictionary() {
     44 }
     45 
     46 // TODO: This needs to take a const int* and not tinker with its contents
     47 static void addWord(int *word, int length, int probability, WordsPriorityQueue *queue, int type) {
     48     queue->push(probability, word, length, type);
     49 }
     50 
     51 // Return the replacement code point for a digraph, or 0 if none.
     52 int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize,
     53         const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {
     54 
     55     // There can't be a digraph if we don't have at least 2 characters to examine
     56     if (i + 2 > inputSize) return false;
     57 
     58     // Search for the first char of some digraph
     59     int lastDigraphIndex = -1;
     60     const int thisChar = codes[i];
     61     for (lastDigraphIndex = digraphsSize - 1; lastDigraphIndex >= 0; --lastDigraphIndex) {
     62         if (thisChar == digraphs[lastDigraphIndex].first) break;
     63     }
     64     // No match: return early
     65     if (lastDigraphIndex < 0) return 0;
     66 
     67     // It's an interesting digraph if the second char matches too.
     68     if (digraphs[lastDigraphIndex].second == codes[i + 1]) {
     69         return digraphs[lastDigraphIndex].compositeGlyph;
     70     } else {
     71         return 0;
     72     }
     73 }
     74 
     75 // Mostly the same arguments as the non-recursive version, except:
     76 // codes is the original value. It points to the start of the work buffer, and gets passed as is.
     77 // inputSize is the size of the user input (thus, it is the size of codesSrc).
     78 // codesDest is the current point in the work buffer.
     79 // codesSrc is the current point in the user-input, original, content-unmodified buffer.
     80 // codesRemain is the remaining size in codesSrc.
     81 void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
     82         const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
     83         int *xCoordinatesBuffer, int *yCoordinatesBuffer,
     84         const int codesBufferSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
     85         const bool useFullEditDistance, const int *codesSrc,
     86         const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
     87         WordsPriorityQueuePool *queuePool,
     88         const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {
     89     ASSERT(sizeof(codesDest[0]) == sizeof(codesSrc[0]));
     90     ASSERT(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0]));
     91     ASSERT(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0]));
     92 
     93     const int startIndex = static_cast<int>(codesDest - codesBuffer);
     94     if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
     95         for (int i = 0; i < codesRemain; ++i) {
     96             xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
     97             yCoordinatesBuffer[startIndex + i] = ycoordinates[codesBufferSize - codesRemain + i];
     98             const int replacementCodePoint =
     99                     getDigraphReplacement(codesSrc, i, codesRemain, digraphs, digraphsSize);
    100             if (0 != replacementCodePoint) {
    101                 // Found a digraph. We will try both spellings. eg. the word is "pruefen"
    102 
    103                 // Copy the word up to the first char of the digraph, including proximity chars,
    104                 // and overwrite the primary code with the replacement code point. Then, continue
    105                 // processing on the remaining part of the word, skipping the second char of the
    106                 // digraph.
    107                 // In our example, copy "pru", replace "u" with the version with the diaeresis and
    108                 // continue running on "fen".
    109                 // Make i the index of the second char of the digraph for simplicity. Forgetting
    110                 // to do that results in an infinite recursion so take care!
    111                 ++i;
    112                 memcpy(codesDest, codesSrc, i * sizeof(codesDest[0]));
    113                 codesDest[i - 1] = replacementCodePoint;
    114                 getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
    115                         codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
    116                         bigramMap, bigramFilter, useFullEditDistance, codesSrc + i + 1,
    117                         codesRemain - i - 1, currentDepth + 1, codesDest + i, correction,
    118                         queuePool, digraphs, digraphsSize);
    119 
    120                 // Copy the second char of the digraph in place, then continue processing on
    121                 // the remaining part of the word.
    122                 // In our example, after "pru" in the buffer copy the "e", and continue on "fen"
    123                 memcpy(codesDest + i, codesSrc + i, sizeof(codesDest[0]));
    124                 getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
    125                         codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
    126                         bigramMap, bigramFilter, useFullEditDistance, codesSrc + i, codesRemain - i,
    127                         currentDepth + 1, codesDest + i, correction, queuePool, digraphs,
    128                         digraphsSize);
    129                 return;
    130             }
    131         }
    132     }
    133 
    134     // If we come here, we hit the end of the word: let's check it against the dictionary.
    135     // In our example, we'll come here once for "prufen" and then once for "pruefen".
    136     // If the word contains several digraphs, we'll come it for the product of them.
    137     // eg. if the word is "ueberpruefen" we'll test, in order, against
    138     // "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".
    139     const unsigned int remainingBytes = sizeof(codesDest[0]) * codesRemain;
    140     if (0 != remainingBytes) {
    141         memcpy(codesDest, codesSrc, remainingBytes);
    142         memcpy(&xCoordinatesBuffer[startIndex], &xcoordinates[codesBufferSize - codesRemain],
    143                 sizeof(xCoordinatesBuffer[0]) * codesRemain);
    144         memcpy(&yCoordinatesBuffer[startIndex], &ycoordinates[codesBufferSize - codesRemain],
    145                 sizeof(yCoordinatesBuffer[0]) * codesRemain);
    146     }
    147 
    148     getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
    149             startIndex + codesRemain, bigramMap, bigramFilter, useFullEditDistance, correction,
    150             queuePool);
    151 }
    152 
    153 // bigramMap contains the association <bigram address> -> <bigram probability>
    154 // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
    155 // in bigram_dictionary.cpp
    156 int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
    157         const int *ycoordinates, const int *inputCodePoints, const int inputSize,
    158         const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
    159         const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
    160     WordsPriorityQueuePool queuePool(MAX_RESULTS, SUB_QUEUE_MAX_WORDS);
    161     queuePool.clearAll();
    162     Correction masterCorrection;
    163     masterCorrection.resetCorrection();
    164     const DigraphUtils::digraph_t *digraphs = 0;
    165     const int digraphsSize =
    166             DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(DICT_FLAGS, &digraphs);
    167     if (digraphsSize > 0)
    168     { // Incrementally tune the word and try all possibilities
    169         int codesBuffer[sizeof(*inputCodePoints) * inputSize];
    170         int xCoordinatesBuffer[inputSize];
    171         int yCoordinatesBuffer[inputSize];
    172         getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
    173                 xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
    174                 useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
    175                 &queuePool, digraphs, digraphsSize);
    176     } else { // Normal processing
    177         getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize,
    178                 bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool);
    179     }
    180 
    181     PROF_START(20);
    182     if (DEBUG_DICT) {
    183         float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
    184                 masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0);
    185         ns += 0;
    186         AKLOGI("Max normalized score = %f", ns);
    187     }
    188     const int suggestedWordsCount =
    189             queuePool.getMasterQueue()->outputSuggestions(masterCorrection.getPrimaryInputWord(),
    190                     inputSize, frequencies, outWords, outputTypes);
    191 
    192     if (DEBUG_DICT) {
    193         float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
    194                 masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0);
    195         ns += 0;
    196         AKLOGI("Returning %d words", suggestedWordsCount);
    197         /// Print the returned words
    198         for (int j = 0; j < suggestedWordsCount; ++j) {
    199             int *w = outWords + j * MAX_WORD_LENGTH;
    200             char s[MAX_WORD_LENGTH];
    201             for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
    202             (void)s; // To suppress compiler warning
    203             AKLOGI("%s %i", s, frequencies[j]);
    204         }
    205     }
    206     PROF_END(20);
    207     PROF_CLOSE;
    208     return suggestedWordsCount;
    209 }
    210 
    211 void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
    212         const int *ycoordinates, const int *inputCodePoints, const int inputSize,
    213         const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
    214         const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
    215         const {
    216     PROF_OPEN;
    217     PROF_START(0);
    218     PROF_END(0);
    219 
    220     PROF_START(1);
    221     getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, bigramMap,
    222             bigramFilter, useFullEditDistance, inputSize, correction, queuePool);
    223     PROF_END(1);
    224 
    225     PROF_START(2);
    226     // Note: This line is intentionally left blank
    227     PROF_END(2);
    228 
    229     PROF_START(3);
    230     // Note: This line is intentionally left blank
    231     PROF_END(3);
    232 
    233     PROF_START(4);
    234     bool hasAutoCorrectionCandidate = false;
    235     WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    236     if (masterQueue->size() > 0) {
    237         float nsForMaster = masterQueue->getHighestNormalizedScore(
    238                 correction->getPrimaryInputWord(), inputSize, 0, 0, 0);
    239         hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD);
    240     }
    241     PROF_END(4);
    242 
    243     PROF_START(5);
    244     // Multiple word suggestions
    245     if (SUGGEST_MULTIPLE_WORDS
    246             && inputSize >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
    247         getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints,
    248                 useFullEditDistance, inputSize, correction, queuePool,
    249                 hasAutoCorrectionCandidate);
    250     }
    251     PROF_END(5);
    252 
    253     PROF_START(6);
    254     // Note: This line is intentionally left blank
    255     PROF_END(6);
    256 
    257     if (DEBUG_DICT) {
    258         queuePool->dumpSubQueue1TopSuggestions();
    259         for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
    260             WordsPriorityQueue *queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i);
    261             if (queue->size() > 0) {
    262                 WordsPriorityQueue::SuggestedWord *sw = queue->top();
    263                 const int score = sw->mScore;
    264                 const int *word = sw->mWord;
    265                 const int wordLength = sw->mWordLength;
    266                 float ns = Correction::RankingAlgorithm::calcNormalizedScore(
    267                         correction->getPrimaryInputWord(), i, word, wordLength, score);
    268                 ns += 0;
    269                 AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
    270                         (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
    271                 DUMP_WORD(correction->getPrimaryInputWord(), i);
    272                 DUMP_WORD(word, wordLength);
    273             }
    274         }
    275     }
    276 }
    277 
    278 void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
    279         const int *yCoordinates, const int *codes, const int inputSize,
    280         Correction *correction) const {
    281     if (DEBUG_DICT) {
    282         AKLOGI("initSuggest");
    283         DUMP_WORD(codes, inputSize);
    284     }
    285     correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
    286     const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
    287     correction->initCorrection(proximityInfo, inputSize, maxDepth);
    288 }
    289 
    290 void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
    291         const int *xcoordinates, const int *ycoordinates, const int *codes,
    292         const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
    293         const bool useFullEditDistance, const int inputSize,
    294         Correction *correction, WordsPriorityQueuePool *queuePool) const {
    295     initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputSize, correction);
    296     getSuggestionCandidates(useFullEditDistance, inputSize, bigramMap, bigramFilter, correction,
    297             queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
    298 }
    299 
    300 void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
    301         const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
    302         Correction *correction, WordsPriorityQueuePool *queuePool,
    303         const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const {
    304     uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount();
    305     if (DEBUG_DICT) {
    306         AKLOGI("Traverse count %d", totalTraverseCount);
    307     }
    308     if (totalTraverseCount > MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT) {
    309         if (DEBUG_DICT) {
    310             AKLOGI("Abort traversing %d", totalTraverseCount);
    311         }
    312         return;
    313     }
    314     // TODO: Remove setCorrectionParams
    315     correction->setCorrectionParams(0, 0, 0,
    316             -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
    317             doAutoCompletion, maxErrors);
    318     int rootPosition = ROOT_POS;
    319     // Get the number of children of root, then increment the position
    320     int childCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &rootPosition);
    321     int outputIndex = 0;
    322 
    323     correction->initCorrectionState(rootPosition, childCount, (inputSize <= 0));
    324 
    325     // Depth first search
    326     while (outputIndex >= 0) {
    327         if (correction->initProcessState(outputIndex)) {
    328             int siblingPos = correction->getTreeSiblingPos(outputIndex);
    329             int firstChildPos;
    330 
    331             const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
    332                     bigramMap, bigramFilter, correction, &childCount, &firstChildPos, &siblingPos,
    333                     queuePool, currentWordIndex);
    334             // Update next sibling pos
    335             correction->setTreeSiblingPos(outputIndex, siblingPos);
    336 
    337             if (needsToTraverseChildrenNodes) {
    338                 // Goes to child node
    339                 outputIndex = correction->goDownTree(outputIndex, childCount, firstChildPos);
    340             }
    341         } else {
    342             // Goes to parent sibling node
    343             outputIndex = correction->getTreeParentIndex(outputIndex);
    344         }
    345     }
    346 }
    347 
    348 void UnigramDictionary::onTerminal(const int probability,
    349         const TerminalAttributes &terminalAttributes, Correction *correction,
    350         WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
    351         const int currentWordIndex) const {
    352     const int inputIndex = correction->getInputIndex();
    353     const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
    354 
    355     int wordLength;
    356     int *wordPointer;
    357 
    358     if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
    359         WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    360         const int finalProbability =
    361                 correction->getFinalProbability(probability, &wordPointer, &wordLength);
    362 
    363         if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) {
    364             // If the probability is 0, we don't want to add this word. However we still
    365             // want to add its shortcuts (including a possible whitelist entry) if any.
    366             // Furthermore, if this is not a word (shortcut only for example) or a blacklisted
    367             // entry then we never want to suggest this.
    368             addWord(wordPointer, wordLength, finalProbability, masterQueue,
    369                     Dictionary::KIND_CORRECTION);
    370         }
    371 
    372         const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
    373         // Please note that the shortcut candidates will be added to the master queue only.
    374         TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
    375         while (iterator.hasNextShortcutTarget()) {
    376             // TODO: addWord only supports weak ordering, meaning we have no means
    377             // to control the order of the shortcuts relative to one another or to the word.
    378             // We need to either modulate the probability of each shortcut according
    379             // to its own shortcut probability or to make the queue
    380             // so that the insert order is protected inside the queue for words
    381             // with the same score. For the moment we use -1 to make sure the shortcut will
    382             // never be in front of the word.
    383             int shortcutTarget[MAX_WORD_LENGTH];
    384             int shortcutFrequency;
    385             const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
    386                     MAX_WORD_LENGTH, shortcutTarget, &shortcutFrequency);
    387             int shortcutScore;
    388             int kind;
    389             if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
    390                     && correction->sameAsTyped()) {
    391                 shortcutScore = S_INT_MAX;
    392                 kind = Dictionary::KIND_WHITELIST;
    393             } else {
    394                 shortcutScore = shortcutProbability;
    395                 kind = Dictionary::KIND_CORRECTION;
    396             }
    397             addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
    398                     masterQueue, kind);
    399         }
    400     }
    401 
    402     // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH
    403     // or more length.
    404     if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
    405         WordsPriorityQueue *subQueue;
    406         subQueue = queuePool->getSubQueue(currentWordIndex, inputIndex);
    407         if (!subQueue) {
    408             return;
    409         }
    410         const int finalProbability = correction->getFinalProbabilityForSubQueue(
    411                 probability, &wordPointer, &wordLength, inputIndex);
    412         addWord(wordPointer, wordLength, finalProbability, subQueue, Dictionary::KIND_CORRECTION);
    413     }
    414 }
    415 
    416 int UnigramDictionary::getSubStringSuggestion(
    417         ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
    418         const int *codes, const bool useFullEditDistance, Correction *correction,
    419         WordsPriorityQueuePool *queuePool, const int inputSize,
    420         const bool hasAutoCorrectionCandidate, const int currentWordIndex,
    421         const int inputWordStartPos, const int inputWordLength,
    422         const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
    423         int *wordLengthArray, int *outputWord, int *outputWordLength) const {
    424     if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
    425         return FLAG_MULTIPLE_SUGGEST_ABORT;
    426     }
    427 
    428     /////////////////////////////////////////////
    429     // safety net for multiple word suggestion //
    430     // TODO: Remove this safety net            //
    431     /////////////////////////////////////////////
    432     int smallWordCount = 0;
    433     int singleLetterWordCount = 0;
    434     if (inputWordLength == 1) {
    435         ++singleLetterWordCount;
    436     }
    437     if (inputWordLength <= 2) {
    438         // small word == single letter or 2-letter word
    439         ++smallWordCount;
    440     }
    441     for (int i = 0; i < currentWordIndex; ++i) {
    442         const int length = wordLengthArray[i];
    443         if (length == 1) {
    444             ++singleLetterWordCount;
    445             // Safety net to avoid suggesting sequential single letter words
    446             if (i < (currentWordIndex - 1)) {
    447                 if (wordLengthArray[i + 1] == 1) {
    448                     return FLAG_MULTIPLE_SUGGEST_ABORT;
    449                 }
    450             } else if (inputWordLength == 1) {
    451                 return FLAG_MULTIPLE_SUGGEST_ABORT;
    452             }
    453         }
    454         if (length <= 2) {
    455             ++smallWordCount;
    456         }
    457         // Safety net to avoid suggesting multiple words with many (4 or more, for now) small words
    458         if (singleLetterWordCount >= 3 || smallWordCount >= 4) {
    459             return FLAG_MULTIPLE_SUGGEST_ABORT;
    460         }
    461     }
    462     //////////////////////////////////////////////
    463     // TODO: Remove the safety net above        //
    464     //////////////////////////////////////////////
    465 
    466     int *tempOutputWord = 0;
    467     int nextWordLength = 0;
    468     // TODO: Optimize init suggestion
    469     initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
    470             inputSize, correction);
    471 
    472     int word[MAX_WORD_LENGTH];
    473     int freq = getMostProbableWordLike(
    474             inputWordStartPos, inputWordLength, correction, word);
    475     if (freq > 0) {
    476         nextWordLength = inputWordLength;
    477         tempOutputWord = word;
    478     } else if (!hasAutoCorrectionCandidate) {
    479         if (inputWordStartPos > 0) {
    480             const int offset = inputWordStartPos;
    481             initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset],
    482                     codes + offset, inputWordLength, correction);
    483             queuePool->clearSubQueue(currentWordIndex);
    484             // TODO: pass the bigram list for substring suggestion
    485             getSuggestionCandidates(useFullEditDistance, inputWordLength,
    486                     0 /* bigramMap */, 0 /* bigramFilter */, correction, queuePool,
    487                     false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
    488             if (DEBUG_DICT) {
    489                 if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
    490                     AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
    491                     for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
    492                         queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
    493                     }
    494                 }
    495             }
    496         }
    497         WordsPriorityQueue *queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
    498         // TODO: Return the correct value depending on doAutoCompletion
    499         if (!queue || queue->size() <= 0) {
    500             return FLAG_MULTIPLE_SUGGEST_ABORT;
    501         }
    502         int score = 0;
    503         const float ns = queue->getHighestNormalizedScore(
    504                 correction->getPrimaryInputWord(), inputWordLength,
    505                 &tempOutputWord, &score, &nextWordLength);
    506         if (DEBUG_DICT) {
    507             AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
    508         }
    509         // Two words correction won't be done if the score of the first word doesn't exceed the
    510         // threshold.
    511         if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
    512                 || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
    513             return FLAG_MULTIPLE_SUGGEST_SKIP;
    514         }
    515         freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
    516     }
    517     if (DEBUG_DICT) {
    518         AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)",
    519                 currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
    520                 (currentWordIndex > 0) ? wordLengthArray[0] : 0);
    521     }
    522     if (freq <= 0 || nextWordLength <= 0
    523             || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
    524         return FLAG_MULTIPLE_SUGGEST_SKIP;
    525     }
    526     for (int i = 0; i < nextWordLength; ++i) {
    527         outputWord[outputWordStartPos + i] = tempOutputWord[i];
    528     }
    529 
    530     // Put output values
    531     freqArray[currentWordIndex] = freq;
    532     // TODO: put output length instead of input length
    533     wordLengthArray[currentWordIndex] = inputWordLength;
    534     const int tempOutputWordLength = outputWordStartPos + nextWordLength;
    535     if (outputWordLength) {
    536         *outputWordLength = tempOutputWordLength;
    537     }
    538 
    539     if ((inputWordStartPos + inputWordLength) < inputSize) {
    540         if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
    541             return FLAG_MULTIPLE_SUGGEST_SKIP;
    542         }
    543         outputWord[tempOutputWordLength] = KEYCODE_SPACE;
    544         if (outputWordLength) {
    545             ++*outputWordLength;
    546         }
    547     } else if (currentWordIndex >= 1) {
    548         // TODO: Handle 3 or more words
    549         const int pairFreq = correction->getFreqForSplitMultipleWords(
    550                 freqArray, wordLengthArray, currentWordIndex + 1, isSpaceProximity, outputWord);
    551         if (DEBUG_DICT) {
    552             DUMP_WORD(outputWord, tempOutputWordLength);
    553             for (int i = 0; i < currentWordIndex + 1; ++i) {
    554                 AKLOGI("Split %d,%d words: freq = %d, length = %d", i, currentWordIndex + 1,
    555                         freqArray[i], wordLengthArray[i]);
    556             }
    557             AKLOGI("Split two words: freq = %d, length = %d, %d, isSpace ? %d", pairFreq,
    558                     inputSize, tempOutputWordLength, isSpaceProximity);
    559         }
    560         addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue(),
    561                 Dictionary::KIND_CORRECTION);
    562     }
    563     return FLAG_MULTIPLE_SUGGEST_CONTINUE;
    564 }
    565 
    566 void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
    567         const int *xcoordinates, const int *ycoordinates, const int *codes,
    568         const bool useFullEditDistance, const int inputSize, Correction *correction,
    569         WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
    570         const int startInputPos, const int startWordIndex, const int outputWordLength,
    571         int *freqArray, int *wordLengthArray, int *outputWord) const {
    572     if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
    573         // Return if the last word index
    574         return;
    575     }
    576     if (startWordIndex >= 1
    577             && (hasAutoCorrectionCandidate
    578                     || inputSize < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) {
    579         // Do not suggest 3+ words if already has auto correction candidate
    580         return;
    581     }
    582     for (int i = startInputPos + 1; i < inputSize; ++i) {
    583         if (DEBUG_CORRECTION_FREQ) {
    584             AKLOGI("Multi words(%d), start in %d sep %d start out %d",
    585                     startWordIndex, startInputPos, i, outputWordLength);
    586             DUMP_WORD(outputWord, outputWordLength);
    587         }
    588         int tempOutputWordLength = 0;
    589         // Current word
    590         int inputWordStartPos = startInputPos;
    591         int inputWordLength = i - startInputPos;
    592         const int suggestionFlag = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates,
    593                 codes, useFullEditDistance, correction, queuePool, inputSize,
    594                 hasAutoCorrectionCandidate, startWordIndex, inputWordStartPos, inputWordLength,
    595                 outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord,
    596                 &tempOutputWordLength);
    597         if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_ABORT) {
    598             // TODO: break here
    599             continue;
    600         } else if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_SKIP) {
    601             continue;
    602         }
    603 
    604         if (DEBUG_CORRECTION_FREQ) {
    605             AKLOGI("Do missing space correction");
    606         }
    607         // Next word
    608         // Missing space
    609         inputWordStartPos = i;
    610         inputWordLength = inputSize - i;
    611         if (getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
    612                 useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
    613                 startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
    614                 false /* missing space */, freqArray, wordLengthArray, outputWord, 0)
    615                         != FLAG_MULTIPLE_SUGGEST_CONTINUE) {
    616             getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
    617                     useFullEditDistance, inputSize, correction, queuePool,
    618                     hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1,
    619                     tempOutputWordLength, freqArray, wordLengthArray, outputWord);
    620         }
    621 
    622         // Mistyped space
    623         ++inputWordStartPos;
    624         --inputWordLength;
    625 
    626         if (inputWordLength <= 0) {
    627             continue;
    628         }
    629 
    630         const int x = xcoordinates[inputWordStartPos - 1];
    631         const int y = ycoordinates[inputWordStartPos - 1];
    632         if (!proximityInfo->hasSpaceProximity(x, y)) {
    633             continue;
    634         }
    635 
    636         if (DEBUG_CORRECTION_FREQ) {
    637             AKLOGI("Do mistyped space correction");
    638         }
    639         getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
    640                 useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
    641                 startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
    642                 true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0);
    643     }
    644 }
    645 
    646 void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
    647         const int *xcoordinates, const int *ycoordinates, const int *codes,
    648         const bool useFullEditDistance, const int inputSize,
    649         Correction *correction, WordsPriorityQueuePool *queuePool,
    650         const bool hasAutoCorrectionCandidate) const {
    651     if (inputSize >= MAX_WORD_LENGTH) return;
    652     if (DEBUG_DICT) {
    653         AKLOGI("--- Suggest multiple words");
    654     }
    655 
    656     // Allocating fixed length array on stack
    657     int outputWord[MAX_WORD_LENGTH];
    658     int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    659     int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    660     const int outputWordLength = 0;
    661     const int startInputPos = 0;
    662     const int startWordIndex = 0;
    663     getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
    664             useFullEditDistance, inputSize, correction, queuePool, hasAutoCorrectionCandidate,
    665             startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
    666             outputWord);
    667 }
    668 
    669 // Wrapper for getMostProbableWordLikeInner, which matches it to the previous
    670 // interface.
    671 int UnigramDictionary::getMostProbableWordLike(const int startInputIndex, const int inputSize,
    672         Correction *correction, int *word) const {
    673     int inWord[inputSize];
    674     for (int i = 0; i < inputSize; ++i) {
    675         inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
    676     }
    677     return getMostProbableWordLikeInner(inWord, inputSize, word);
    678 }
    679 
    680 // This function will take the position of a character array within a CharGroup,
    681 // and check it actually like-matches the word in inWord starting at startInputIndex,
    682 // that is, it matches it with case and accents squashed.
    683 // The function returns true if there was a full match, false otherwise.
    684 // The function will copy on-the-fly the characters in the CharGroup to outNewWord.
    685 // It will also place the end position of the array in outPos; in outInputIndex,
    686 // it will place the index of the first char AFTER the match if there was a match,
    687 // and the initial position if there was not. It makes sense because if there was
    688 // a match we want to continue searching, but if there was not, we want to go to
    689 // the next CharGroup.
    690 // In and out parameters may point to the same location. This function takes care
    691 // not to use any input parameters after it wrote into its outputs.
    692 static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
    693         const uint8_t *const root, const int startPos, const int *const inWord,
    694         const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
    695         int *outPos) {
    696     const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
    697     int pos = startPos;
    698     int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
    699     int baseChar = toBaseLowerCase(codePoint);
    700     const int wChar = toBaseLowerCase(inWord[startInputIndex]);
    701 
    702     if (baseChar != wChar) {
    703         *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
    704         *outInputIndex = startInputIndex;
    705         return false;
    706     }
    707     int inputIndex = startInputIndex;
    708     outNewWord[inputIndex] = codePoint;
    709     if (hasMultipleChars) {
    710         codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
    711         while (NOT_A_CODE_POINT != codePoint) {
    712             baseChar = toBaseLowerCase(codePoint);
    713             if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
    714                 *outPos = BinaryFormat::skipOtherCharacters(root, pos);
    715                 *outInputIndex = startInputIndex;
    716                 return false;
    717             }
    718             outNewWord[inputIndex] = codePoint;
    719             codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
    720         }
    721     }
    722     *outInputIndex = inputIndex + 1;
    723     *outPos = pos;
    724     return true;
    725 }
    726 
    727 // This function is invoked when a word like the word searched for is found.
    728 // It will compare the probability to the max probability, and if greater, will
    729 // copy the word into the output buffer. In output value maxFreq, it will
    730 // write the new maximum probability if it changed.
    731 static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
    732         int *maxFreq) {
    733     if (freq > *maxFreq) {
    734         for (int q = 0; q < length; ++q) {
    735             outWord[q] = newWord[q];
    736         }
    737         outWord[length] = 0;
    738         *maxFreq = freq;
    739     }
    740 }
    741 
    742 // Will find the highest probability of the words like the one passed as an argument,
    743 // that is, everything that only differs by case/accents.
    744 int UnigramDictionary::getMostProbableWordLikeInner(const int *const inWord, const int inputSize,
    745         int *outWord) const {
    746     int newWord[MAX_WORD_LENGTH];
    747     int depth = 0;
    748     int maxFreq = -1;
    749     const uint8_t *const root = DICT_ROOT;
    750     int stackChildCount[MAX_WORD_LENGTH];
    751     int stackInputIndex[MAX_WORD_LENGTH];
    752     int stackSiblingPos[MAX_WORD_LENGTH];
    753 
    754     int startPos = 0;
    755     stackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
    756     stackInputIndex[0] = 0;
    757     stackSiblingPos[0] = startPos;
    758     while (depth >= 0) {
    759         const int charGroupCount = stackChildCount[depth];
    760         int pos = stackSiblingPos[depth];
    761         for (int charGroupIndex = charGroupCount - 1; charGroupIndex >= 0; --charGroupIndex) {
    762             int inputIndex = stackInputIndex[depth];
    763             const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
    764             // Test whether all chars in this group match with the word we are searching for. If so,
    765             // we want to traverse its children (or if the inputSize match, evaluate its
    766             // probability). Note that this function will output the position regardless, but will
    767             // only write into inputIndex if there is a match.
    768             const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
    769                     inputIndex, inputSize, newWord, &inputIndex, &pos);
    770             if (isAlike && (!(BinaryFormat::FLAG_IS_NOT_A_WORD & flags))
    771                     && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
    772                 const int probability =
    773                         BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
    774                 onTerminalWordLike(probability, newWord, inputIndex, outWord, &maxFreq);
    775             }
    776             pos = BinaryFormat::skipProbability(flags, pos);
    777             const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
    778             const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
    779             // If we had a match and the word has children, we want to traverse them. We don't have
    780             // to traverse words longer than the one we are searching for, since they will not match
    781             // anyway, so don't traverse unless inputIndex < inputSize.
    782             if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) {
    783                 // Save position for this depth, to get back to this once children are done
    784                 stackChildCount[depth] = charGroupIndex;
    785                 stackSiblingPos[depth] = siblingPos;
    786                 // Prepare stack values for next depth
    787                 ++depth;
    788                 int childrenPos = childrenNodePos;
    789                 stackChildCount[depth] =
    790                         BinaryFormat::getGroupCountAndForwardPointer(root, &childrenPos);
    791                 stackSiblingPos[depth] = childrenPos;
    792                 stackInputIndex[depth] = inputIndex;
    793                 pos = childrenPos;
    794                 // Go to the next depth level.
    795                 ++depth;
    796                 break;
    797             } else {
    798                 // No match, or no children, or word too long to ever match: go the next sibling.
    799                 pos = siblingPos;
    800             }
    801         }
    802         --depth;
    803     }
    804     return maxFreq;
    805 }
    806 
    807 int UnigramDictionary::getProbability(const int *const inWord, const int length) const {
    808     const uint8_t *const root = DICT_ROOT;
    809     int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
    810             false /* forceLowerCaseSearch */);
    811     if (NOT_VALID_WORD == pos) {
    812         return NOT_A_PROBABILITY;
    813     }
    814     const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
    815     if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
    816         // If this is not a word, or if it's a blacklisted entry, it should behave as
    817         // having no probability outside of the suggestion process (where it should be used
    818         // for shortcuts).
    819         return NOT_A_PROBABILITY;
    820     }
    821     const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
    822     if (hasMultipleChars) {
    823         pos = BinaryFormat::skipOtherCharacters(root, pos);
    824     } else {
    825         BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
    826     }
    827     const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
    828     return unigramProbability;
    829 }
    830 
    831 // TODO: remove this function.
    832 int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
    833     return -1;
    834 }
    835 
    836 // ProcessCurrentNode returns a boolean telling whether to traverse children nodes or not.
    837 // If the return value is false, then the caller should read in the output "nextSiblingPosition"
    838 // to find out the address of the next sibling node and pass it to a new call of processCurrentNode.
    839 // It is worthy to note that when false is returned, the output values other than
    840 // nextSiblingPosition are undefined.
    841 // If the return value is true, then the caller must proceed to traverse the children of this
    842 // node. processCurrentNode will output the information about the children: their count in
    843 // newCount, their position in newChildrenPosition, the traverseAllNodes flag in
    844 // newTraverseAllNodes, the match weight into newMatchRate, the input index into newInputIndex, the
    845 // diffs into newDiffs, the sibling position in nextSiblingPosition, and the output index into
    846 // newOutputIndex. Please also note the following caveat: processCurrentNode does not know when
    847 // there aren't any more nodes at this level, it merely returns the address of the first byte after
    848 // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
    849 // given level, as output into newCount when traversing this level's parent.
    850 bool UnigramDictionary::processCurrentNode(const int initialPos,
    851         const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
    852         int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
    853         WordsPriorityQueuePool *queuePool, const int currentWordIndex) const {
    854     if (DEBUG_DICT) {
    855         correction->checkState();
    856     }
    857     int pos = initialPos;
    858 
    859     // Flags contain the following information:
    860     // - Address type (MASK_GROUP_ADDRESS_TYPE) on two bits:
    861     //   - FLAG_GROUP_ADDRESS_TYPE_{ONE,TWO,THREE}_BYTES means there are children and their address
    862     //     is on the specified number of bytes.
    863     //   - FLAG_GROUP_ADDRESS_TYPE_NOADDRESS means there are no children, and therefore no address.
    864     // - FLAG_HAS_MULTIPLE_CHARS: whether this node has multiple char or not.
    865     // - FLAG_IS_TERMINAL: whether this node is a terminal or not (it may still have children)
    866     // - FLAG_HAS_BIGRAMS: whether this node has bigrams or not
    867     const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos);
    868     const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
    869     const bool isTerminalNode = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
    870 
    871     bool needsToInvokeOnTerminal = false;
    872 
    873     // This gets only ONE character from the stream. Next there will be:
    874     // if FLAG_HAS_MULTIPLE CHARS: the other characters of the same node
    875     // else if FLAG_IS_TERMINAL: the probability
    876     // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
    877     // Note that you can't have a node that both is not a terminal and has no children.
    878     int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
    879     ASSERT(NOT_A_CODE_POINT != c);
    880 
    881     // We are going to loop through each character and make it look like it's a different
    882     // node each time. To do that, we will process characters in this node in order until
    883     // we find the character terminator. This is signalled by getCodePoint* returning
    884     // NOT_A_CODE_POINT.
    885     // As a special case, if there is only one character in this node, we must not read the
    886     // next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags.
    887     // This way, each loop run will look like a "virtual node".
    888     do {
    889         // We prefetch the next char. If 'c' is the last char of this node, we will have
    890         // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
    891         // should behave as a terminal or not and whether we have children.
    892         const int nextc = hasMultipleChars
    893                 ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
    894         const bool isLastChar = (NOT_A_CODE_POINT == nextc);
    895         // If there are more chars in this nodes, then this virtual node is not a terminal.
    896         // If we are on the last char, this virtual node is a terminal if this node is.
    897         const bool isTerminal = isLastChar && isTerminalNode;
    898 
    899         Correction::CorrectionType stateType = correction->processCharAndCalcState(
    900                 c, isTerminal);
    901         if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
    902                 || stateType == Correction::ON_TERMINAL) {
    903             needsToInvokeOnTerminal = true;
    904         } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) {
    905             // We found that this is an unrelated character, so we should give up traversing
    906             // this node and its children entirely.
    907             // However we may not be on the last virtual node yet so we skip the remaining
    908             // characters in this node, the probability if it's there, read the next sibling
    909             // position to output it, then return false.
    910             // We don't have to output other values because we return false, as in
    911             // "don't traverse children".
    912             if (!isLastChar) {
    913                 pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos);
    914             }
    915             pos = BinaryFormat::skipProbability(flags, pos);
    916             *nextSiblingPosition =
    917                     BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
    918             return false;
    919         }
    920 
    921         // Prepare for the next character. Promote the prefetched char to current char - the loop
    922         // will take care of prefetching the next. If we finally found our last char, nextc will
    923         // contain NOT_A_CODE_POINT.
    924         c = nextc;
    925     } while (NOT_A_CODE_POINT != c);
    926 
    927     if (isTerminalNode) {
    928         // The probability should be here, because we come here only if this is actually
    929         // a terminal node, and we are on its last char.
    930         const int unigramProbability =
    931                 BinaryFormat::readProbabilityWithoutMovingPointer(DICT_ROOT, pos);
    932         const int childrenAddressPos = BinaryFormat::skipProbability(flags, pos);
    933         const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
    934         TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
    935         // bigramMap contains the bigram frequencies indexed by addresses for fast lookup.
    936         // bigramFilter is a bloom filter of said frequencies for even faster rejection.
    937         const int probability = BinaryFormat::getProbability(initialPos, bigramMap, bigramFilter,
    938                 unigramProbability);
    939         onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
    940                 currentWordIndex);
    941 
    942         // If there are more chars in this node, then this virtual node has children.
    943         // If we are on the last char, this virtual node has children if this node has.
    944         const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
    945 
    946         // This character matched the typed character (enough to traverse the node at least)
    947         // so we just evaluated it. Now we should evaluate this virtual node's children - that
    948         // is, if it has any. If it has no children, we're done here - so we skip the end of
    949         // the node, output the siblings position, and return false "don't traverse children".
    950         // Note that !hasChildren implies isLastChar, so we know we don't have to skip any
    951         // remaining char in this group for there can't be any.
    952         if (!hasChildren) {
    953             pos = BinaryFormat::skipProbability(flags, pos);
    954             *nextSiblingPosition =
    955                     BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
    956             return false;
    957         }
    958 
    959         // Optimization: Prune out words that are too long compared to how much was typed.
    960         if (correction->needsToPrune()) {
    961             pos = BinaryFormat::skipProbability(flags, pos);
    962             *nextSiblingPosition =
    963                     BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
    964             if (DEBUG_DICT_FULL) {
    965                 AKLOGI("Traversing was pruned.");
    966             }
    967             return false;
    968         }
    969     }
    970 
    971     // Now we finished processing this node, and we want to traverse children. If there are no
    972     // children, we can't come here.
    973     ASSERT(BinaryFormat::hasChildrenInFlags(flags));
    974 
    975     // If this node was a terminal it still has the probability under the pointer (it may have been
    976     // read, but not skipped - see readProbabilityWithoutMovingPointer).
    977     // Next come the children position, then possibly attributes (attributes are bigrams only for
    978     // now, maybe something related to shortcuts in the future).
    979     // Once this is read, we still need to output the number of nodes in the immediate children of
    980     // this node, so we read and output it before returning true, as in "please traverse children".
    981     pos = BinaryFormat::skipProbability(flags, pos);
    982     int childrenPos = BinaryFormat::readChildrenPosition(DICT_ROOT, flags, pos);
    983     *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
    984     *newCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &childrenPos);
    985     *newChildrenPosition = childrenPos;
    986     return true;
    987 }
    988 } // namespace latinime
    989