Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/core/suggest.h"
     18 
     19 #include "suggest/core/dicnode/dic_node.h"
     20 #include "suggest/core/dicnode/dic_node_priority_queue.h"
     21 #include "suggest/core/dicnode/dic_node_vector.h"
     22 #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
     23 #include "suggest/core/dictionary/dictionary.h"
     24 #include "suggest/core/dictionary/digraph_utils.h"
     25 #include "suggest/core/dictionary/shortcut_utils.h"
     26 #include "suggest/core/layout/proximity_info.h"
     27 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
     28 #include "suggest/core/policy/scoring.h"
     29 #include "suggest/core/policy/traversal.h"
     30 #include "suggest/core/policy/weighting.h"
     31 #include "suggest/core/session/dic_traverse_session.h"
     32 
     33 namespace latinime {
     34 
     35 // Initialization of class constants.
     36 const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
     37 const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
     38 const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
     39 
     40 /**
     41  * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
     42  * whether to prematurely commit the suggested words up to the given point for sentence-level
     43  * suggestion.
     44  *
     45  * Note: Currently does not support concurrent calls across threads. Continuous suggestion is
     46  * automatically activated for sequential calls that share the same starting input.
     47  * TODO: Stop detecting continuous suggestion. Start using traverseSession instead.
     48  */
     49 int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
     50         int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
     51         int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices,
     52         int *outputTypes, int *outputAutoCommitFirstWordConfidence) const {
     53     PROF_OPEN;
     54     PROF_START(0);
     55     const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
     56     DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
     57     tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times,
     58             pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount());
     59     // TODO: Add the way to evaluate cache
     60 
     61     initializeSearch(tSession, commitPoint);
     62     PROF_END(0);
     63     PROF_START(1);
     64 
     65     // keep expanding search dicNodes until all have terminated.
     66     while (tSession->getDicTraverseCache()->activeSize() > 0) {
     67         expandCurrentDicNodes(tSession);
     68         tSession->getDicTraverseCache()->advanceActiveDicNodes();
     69         tSession->getDicTraverseCache()->advanceInputIndex(inputSize);
     70     }
     71     PROF_END(1);
     72     PROF_START(2);
     73     const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes,
     74             outputAutoCommitFirstWordConfidence);
     75     PROF_END(2);
     76     PROF_CLOSE;
     77     return size;
     78 }
     79 
     80 /**
     81  * Initializes the search at the root of the lexicon trie. Note that when possible the search will
     82  * continue suggestion from where it left off during the last call.
     83  */
     84 void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const {
     85     if (!traverseSession->getProximityInfoState(0)->isUsed()) {
     86         return;
     87     }
     88 
     89     // Never auto partial commit for now.
     90     commitPoint = 0;
     91 
     92     if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
     93             && traverseSession->isContinuousSuggestionPossible()) {
     94         if (commitPoint == 0) {
     95             // Continue suggestion
     96             traverseSession->getDicTraverseCache()->continueSearch();
     97         } else {
     98             // Continue suggestion after partial commit.
     99             DicNode *topDicNode =
    100                     traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
    101             traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
    102             traverseSession->getDicTraverseCache()->continueSearch();
    103             traverseSession->setPartiallyCommited();
    104         }
    105     } else {
    106         // Restart recognition at the root.
    107         traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
    108                 MAX_RESULTS);
    109         // Create a new dic node here
    110         DicNode rootNode;
    111         DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
    112                 traverseSession->getPrevWordPos(), &rootNode);
    113         traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
    114     }
    115 }
    116 
    117 /**
    118  * Outputs the final list of suggestions (i.e., terminal nodes).
    119  */
    120 int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
    121         int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
    122         int *outputAutoCommitFirstWordConfidence) const {
    123 #if DEBUG_EVALUATE_MOST_PROBABLE_STRING
    124     const int terminalSize = 0;
    125 #else
    126     const int terminalSize = min(MAX_RESULTS,
    127             static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
    128 #endif
    129     DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
    130 
    131     for (int index = terminalSize - 1; index >= 0; --index) {
    132         traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
    133     }
    134 
    135     const float languageWeight = SCORING->getAdjustedLanguageWeight(
    136             traverseSession, terminals, terminalSize);
    137 
    138     int outputWordIndex = 0;
    139     // Insert most probable word at index == 0 as long as there is one terminal at least
    140     const bool hasMostProbableString =
    141             SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
    142                     &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
    143     if (hasMostProbableString) {
    144         outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
    145         ++outputWordIndex;
    146     }
    147 
    148     // Initial value of the loop index for terminal nodes (words)
    149     int doubleLetterTerminalIndex = -1;
    150     DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
    151     SCORING->searchWordWithDoubleLetter(terminals, terminalSize,
    152             &doubleLetterTerminalIndex, &doubleLetterLevel);
    153 
    154     int maxScore = S_INT_MIN;
    155     // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
    156     // a long multiple words suggestion.
    157     // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
    158     // traverseSession->isPartiallyCommited() always returns false because we never auto partial
    159     // commit for now.
    160     const bool forceCommitMultiWords = (terminalSize > 0) ?
    161             TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop()
    162                     && (traverseSession->isPartiallyCommited()
    163                             || (traverseSession->getInputSize()
    164                                     >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
    165                                             && terminals[0].hasMultipleWords())) : false;
    166     // TODO: have partial commit work even with multiple pointers.
    167     const bool outputSecondWordFirstLetterInputIndex =
    168             traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
    169     if (terminalSize > 0) {
    170         // If we have no suggestions, don't write this
    171         outputAutoCommitFirstWordConfidence[0] =
    172                 computeFirstWordConfidence(&terminals[0]);
    173     }
    174 
    175     // Output suggestion results here
    176     for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
    177             ++terminalIndex) {
    178         DicNode *terminalDicNode = &terminals[terminalIndex];
    179         if (DEBUG_GEO_FULL) {
    180             terminalDicNode->dump("OUT:");
    181         }
    182         const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost(
    183                 terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
    184         const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
    185                 + doubleLetterCost;
    186         const bool isPossiblyOffensiveWord =
    187                 traverseSession->getDictionaryStructurePolicy()->getProbability(
    188                         terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
    189         const bool isExactMatch = terminalDicNode->isExactMatch();
    190         const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
    191         // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
    192         // (e.g. "AMD" and "and")
    193         const bool isSafeExactMatch = isExactMatch
    194                 && !(isPossiblyOffensiveWord && isFirstCharUppercase);
    195         const int outputTypeFlags =
    196                 (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
    197                 | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
    198 
    199         // Entries that are blacklisted or do not represent a word should not be output.
    200         const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
    201 
    202         // Increase output score of top typing suggestion to ensure autocorrection.
    203         // TODO: Better integration with java side autocorrection logic.
    204         const int finalScore = SCORING->calculateFinalScore(
    205                 compoundDistance, traverseSession->getInputSize(),
    206                 terminalDicNode->isExactMatch()
    207                         || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
    208                                 || (isValidWord && SCORING->doesAutoCorrectValidWord()));
    209         if (maxScore < finalScore && isValidWord) {
    210             maxScore = finalScore;
    211         }
    212 
    213         // Don't output invalid words. However, we still need to submit their shortcuts if any.
    214         if (isValidWord) {
    215             outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
    216             frequencies[outputWordIndex] = finalScore;
    217             if (outputSecondWordFirstLetterInputIndex) {
    218                 outputIndicesToPartialCommit[outputWordIndex] =
    219                         terminalDicNode->getSecondWordFirstInputIndex(
    220                                 traverseSession->getProximityInfoState(0));
    221             } else {
    222                 outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
    223             }
    224             // Populate the outputChars array with the suggested word.
    225             const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
    226             terminalDicNode->outputResult(&outputCodePoints[startIndex]);
    227             ++outputWordIndex;
    228         }
    229 
    230         if (!terminalDicNode->hasMultipleWords()) {
    231             BinaryDictionaryShortcutIterator shortcutIt(
    232                     traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
    233                     traverseSession->getDictionaryStructurePolicy()
    234                             ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
    235             // Shortcut is not supported for multiple words suggestions.
    236             // TODO: Check shortcuts during traversal for multiple words suggestions.
    237             const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
    238             const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
    239                     outputWordIndex,  finalScore, outputCodePoints, frequencies, outputTypes,
    240                     sameAsTyped);
    241             const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
    242                     traverseSession->getProximityInfoState(0));
    243             for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
    244                 if (outputSecondWordFirstLetterInputIndex) {
    245                     outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
    246                 } else {
    247                     outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
    248                 }
    249             }
    250             outputWordIndex = updatedOutputWordIndex;
    251         }
    252         DicNode::managedDelete(terminalDicNode);
    253     }
    254 
    255     if (hasMostProbableString) {
    256         SCORING->safetyNetForMostProbableString(terminalSize, maxScore,
    257                 &outputCodePoints[0], &frequencies[0]);
    258     }
    259     return outputWordIndex;
    260 }
    261 
    262 int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
    263     // Get the number of spaces in the first suggestion
    264     const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
    265     // Get the number of characters in the first suggestion
    266     const int length = terminalDicNode->getTotalNodeCodePointCount();
    267     // Get the distance for the first word of the suggestion
    268     const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
    269 
    270     // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
    271     // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
    272     // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
    273     // we are very confident.
    274     // Expected space count is 1 ~ 5
    275     static const int MIN_EXPECTED_SPACE_COUNT = 1;
    276     static const int MAX_EXPECTED_SPACE_COUNT = 5;
    277     // Expected length is about 4 ~ 30
    278     static const int MIN_EXPECTED_LENGTH = 4;
    279     static const int MAX_EXPECTED_LENGTH = 30;
    280     // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
    281     static const float MIN_EXPECTED_DISTANCE = 0.0;
    282     static const float MAX_EXPECTED_DISTANCE = 2.0;
    283     // This is not strict: it's where most stuff will be falling, but it's still fine if it's
    284     // outside these values. We want to output a value that reflects all of these. Each factor
    285     // contributes a bit.
    286 
    287     // We need at least a space.
    288     if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
    289 
    290     // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
    291     // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
    292     // weight of the distance. Clamp to avoid overflows.
    293     const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
    294             : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
    295     const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
    296             * (MAX_EXPECTED_DISTANCE - clampedDistance)
    297             / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
    298     // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
    299     // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
    300     // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
    301     const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
    302             * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
    303     // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
    304     // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
    305     // weight of the space count.
    306     const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
    307             * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
    308             / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
    309 
    310     return distanceContribution + lengthContribution + spaceContribution;
    311 }
    312 
    313 /**
    314  * Expands the dicNodes in the current search priority queue by advancing to the possible child
    315  * nodes based on the next touch point(s) (or no touch points for lookahead)
    316  */
    317 void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
    318     const int inputSize = traverseSession->getInputSize();
    319     DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize());
    320     DicNode correctionDicNode;
    321 
    322     // TODO: Find more efficient caching
    323     const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession);
    324     if (shouldDepthLevelCache) {
    325         traverseSession->getDicTraverseCache()->updateLastCachedInputIndex();
    326     }
    327     if (DEBUG_CACHE) {
    328         AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d",
    329                 shouldDepthLevelCache, inputSize);
    330     }
    331     while (traverseSession->getDicTraverseCache()->activeSize() > 0) {
    332         DicNode dicNode;
    333         traverseSession->getDicTraverseCache()->popActive(&dicNode);
    334         if (dicNode.isTotalInputSizeExceedingLimit()) {
    335             return;
    336         }
    337         childDicNodes.clear();
    338         const int point0Index = dicNode.getInputIndex(0);
    339         const bool canDoLookAheadCorrection =
    340                 TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode);
    341         const bool isLookAheadCorrection = canDoLookAheadCorrection
    342                 && traverseSession->getDicTraverseCache()->
    343                         isLookAheadCorrectionInputIndex(static_cast<int>(point0Index));
    344         const bool isCompletion = dicNode.isCompletion(inputSize);
    345 
    346         const bool shouldNodeLevelCache =
    347                 TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode);
    348         if (shouldDepthLevelCache || shouldNodeLevelCache) {
    349             if (DEBUG_CACHE) {
    350                 dicNode.dump("PUSH_CACHE");
    351             }
    352             traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode);
    353             dicNode.setCached();
    354         }
    355 
    356         if (dicNode.isInDigraph()) {
    357             // Finish digraph handling if the node is in the middle of a digraph expansion.
    358             processDicNodeAsDigraph(traverseSession, &dicNode);
    359         } else if (isLookAheadCorrection) {
    360             // The algorithm maintains a small set of "deferred" nodes that have not consumed the
    361             // latest touch point yet. These are needed to apply look-ahead correction operations
    362             // that require special handling of the latest touch point. For example, with insertions
    363             // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all.
    364             processDicNodeAsTransposition(traverseSession, &dicNode);
    365             processDicNodeAsInsertion(traverseSession, &dicNode);
    366         } else { // !isLookAheadCorrection
    367             // Only consider typing error corrections if the normalized compound distance is
    368             // below a spatial distance threshold.
    369             // NOTE: the threshold may need to be updated if scoring model changes.
    370             // TODO: Remove. Do not prune node here.
    371             const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
    372             // Process for handling space substitution (e.g., hevis => he is)
    373             if (allowsErrorCorrections
    374                     && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
    375                 createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
    376             }
    377 
    378             DicNodeUtils::getAllChildDicNodes(
    379                     &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
    380 
    381             const int childDicNodesSize = childDicNodes.getSizeAndLock();
    382             for (int i = 0; i < childDicNodesSize; ++i) {
    383                 DicNode *const childDicNode = childDicNodes[i];
    384                 if (isCompletion) {
    385                     // Handle forward lookahead when the lexicon letter exceeds the input size.
    386                     processDicNodeAsMatch(traverseSession, childDicNode);
    387                     continue;
    388                 }
    389                 if (DigraphUtils::hasDigraphForCodePoint(
    390                         traverseSession->getDictionaryStructurePolicy()
    391                                 ->getHeaderStructurePolicy(),
    392                         childDicNode->getNodeCodePoint())) {
    393                     correctionDicNode.initByCopy(childDicNode);
    394                     correctionDicNode.advanceDigraphIndex();
    395                     processDicNodeAsDigraph(traverseSession, &correctionDicNode);
    396                 }
    397                 if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode,
    398                         allowsErrorCorrections)) {
    399                     // TODO: (Gesture) Change weight between omission and substitution errors
    400                     // TODO: (Gesture) Terminal node should not be handled as omission
    401                     correctionDicNode.initByCopy(childDicNode);
    402                     processDicNodeAsOmission(traverseSession, &correctionDicNode);
    403                 }
    404                 const ProximityType proximityType = TRAVERSAL->getProximityType(
    405                         traverseSession, &dicNode, childDicNode);
    406                 switch (proximityType) {
    407                     // TODO: Consider the difference of proximityType here
    408                     case MATCH_CHAR:
    409                     case PROXIMITY_CHAR:
    410                         processDicNodeAsMatch(traverseSession, childDicNode);
    411                         break;
    412                     case ADDITIONAL_PROXIMITY_CHAR:
    413                         if (allowsErrorCorrections) {
    414                             processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode,
    415                                     childDicNode);
    416                         }
    417                         break;
    418                     case SUBSTITUTION_CHAR:
    419                         if (allowsErrorCorrections) {
    420                             processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode);
    421                         }
    422                         break;
    423                     case UNRELATED_CHAR:
    424                         // Just drop this node and do nothing.
    425                         break;
    426                     default:
    427                         // Just drop this node and do nothing.
    428                         break;
    429                 }
    430             }
    431 
    432             // Push the node for look-ahead correction
    433             if (allowsErrorCorrections && canDoLookAheadCorrection) {
    434                 traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
    435             }
    436         }
    437     }
    438 }
    439 
    440 void Suggest::processTerminalDicNode(
    441         DicTraverseSession *traverseSession, DicNode *dicNode) const {
    442     if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
    443         return;
    444     }
    445     if (!dicNode->isTerminalWordNode()) {
    446         return;
    447     }
    448     if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
    449         return;
    450     }
    451     // Create a non-cached node here.
    452     DicNode terminalDicNode;
    453     DicNodeUtils::initByCopy(dicNode, &terminalDicNode);
    454     if (TRAVERSAL->needsToTraverseAllUserInput()
    455             && dicNode->getInputIndex(0) < traverseSession->getInputSize()) {
    456         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0,
    457                 &terminalDicNode, traverseSession->getMultiBigramMap());
    458     }
    459     Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0,
    460             &terminalDicNode, traverseSession->getMultiBigramMap());
    461     traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode);
    462 }
    463 
    464 /**
    465  * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
    466  * (by the space omission error correction) search path if input dicNode is on a terminal node.
    467  */
    468 void Suggest::processExpandedDicNode(
    469         DicTraverseSession *traverseSession, DicNode *dicNode) const {
    470     processTerminalDicNode(traverseSession, dicNode);
    471     if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
    472         if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) {
    473             createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */);
    474         }
    475         const int allowsLookAhead = !(dicNode->hasMultipleWords()
    476                 && dicNode->isCompletion(traverseSession->getInputSize()));
    477         if (dicNode->hasChildren() && allowsLookAhead) {
    478             traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode);
    479         }
    480     }
    481     DicNode::managedDelete(dicNode);
    482 }
    483 
    484 void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession,
    485         DicNode *childDicNode) const {
    486     weightChildNode(traverseSession, childDicNode);
    487     processExpandedDicNode(traverseSession, childDicNode);
    488 }
    489 
    490 void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession,
    491         DicNode *dicNode, DicNode *childDicNode) const {
    492     // Note: Most types of corrections don't need to look up the bigram information since they do
    493     // not treat the node as a terminal. There is no need to pass the bigram map in these cases.
    494     Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY,
    495             traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */);
    496     weightChildNode(traverseSession, childDicNode);
    497     processExpandedDicNode(traverseSession, childDicNode);
    498 }
    499 
    500 void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
    501         DicNode *dicNode, DicNode *childDicNode) const {
    502     Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession,
    503             dicNode, childDicNode, 0 /* multiBigramMap */);
    504     weightChildNode(traverseSession, childDicNode);
    505     processExpandedDicNode(traverseSession, childDicNode);
    506 }
    507 
    508 // Process the node codepoint as a digraph. This means that composite glyphs like the German
    509 // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
    510 // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
    511 void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
    512         DicNode *childDicNode) const {
    513     weightChildNode(traverseSession, childDicNode);
    514     childDicNode->advanceDigraphIndex();
    515     processExpandedDicNode(traverseSession, childDicNode);
    516 }
    517 
    518 /**
    519  * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
    520  * matches for all possible next letters. Note that just skipping the current letter without any
    521  * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
    522  * the possible *next* letters after the omission to better limit search to plausible omissions.
    523  * Note that apostrophes are handled as omissions.
    524  */
    525 void Suggest::processDicNodeAsOmission(
    526         DicTraverseSession *traverseSession, DicNode *dicNode) const {
    527     DicNodeVector childDicNodes;
    528     DicNodeUtils::getAllChildDicNodes(
    529             dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
    530 
    531     const int size = childDicNodes.getSizeAndLock();
    532     for (int i = 0; i < size; i++) {
    533         DicNode *const childDicNode = childDicNodes[i];
    534         // Treat this word as omission
    535         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession,
    536                 dicNode, childDicNode, 0 /* multiBigramMap */);
    537         weightChildNode(traverseSession, childDicNode);
    538         if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) {
    539             continue;
    540         }
    541         processExpandedDicNode(traverseSession, childDicNode);
    542     }
    543 }
    544 
    545 /**
    546  * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and
    547  * consider matches for the next touch point.
    548  */
    549 void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
    550         DicNode *dicNode) const {
    551     const int16_t pointIndex = dicNode->getInputIndex(0);
    552     DicNodeVector childDicNodes;
    553     DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
    554             &childDicNodes);
    555     const int size = childDicNodes.getSizeAndLock();
    556     for (int i = 0; i < size; i++) {
    557         if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1)
    558                 != childDicNodes[i]->getNodeCodePoint()) {
    559             continue;
    560         }
    561         DicNode *const childDicNode = childDicNodes[i];
    562         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession,
    563                 dicNode, childDicNode, 0 /* multiBigramMap */);
    564         processExpandedDicNode(traverseSession, childDicNode);
    565     }
    566 }
    567 
    568 /**
    569  * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points.
    570  */
    571 void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
    572         DicNode *dicNode) const {
    573     const int16_t pointIndex = dicNode->getInputIndex(0);
    574     DicNodeVector childDicNodes1;
    575     DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
    576             &childDicNodes1);
    577     const int childSize1 = childDicNodes1.getSizeAndLock();
    578     for (int i = 0; i < childSize1; i++) {
    579         const ProximityType matchedId1 = traverseSession->getProximityInfoState(0)
    580                 ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(),
    581                         true /* checkProximityChars */);
    582         if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) {
    583             continue;
    584         }
    585         if (childDicNodes1[i]->hasChildren()) {
    586             DicNodeVector childDicNodes2;
    587             DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
    588                     traverseSession->getDictionaryStructurePolicy(), &childDicNodes2);
    589             const int childSize2 = childDicNodes2.getSizeAndLock();
    590             for (int j = 0; j < childSize2; j++) {
    591                 DicNode *const childDicNode2 = childDicNodes2[j];
    592                 const ProximityType matchedId2 = traverseSession->getProximityInfoState(0)
    593                         ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(),
    594                                 true /* checkProximityChars */);
    595                 if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) {
    596                     continue;
    597                 }
    598                 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION,
    599                         traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */);
    600                 processExpandedDicNode(traverseSession, childDicNode2);
    601             }
    602         }
    603         DicNode::managedDelete(childDicNodes1[i]);
    604     }
    605 }
    606 
    607 /**
    608  * Weight child node by aligning it to the key
    609  */
    610 void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
    611     const int inputSize = traverseSession->getInputSize();
    612     if (dicNode->isCompletion(inputSize)) {
    613         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession,
    614                 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
    615     } else { // completion
    616         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession,
    617                 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
    618     }
    619 }
    620 
    621 /**
    622  * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also
    623  * incorporates the unigram / bigram score for the ending word into the new dicNode.
    624  */
    625 void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
    626         const bool spaceSubstitution) const {
    627     if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) {
    628         return;
    629     }
    630 
    631     // Create a non-cached node here.
    632     DicNode newDicNode;
    633     DicNodeUtils::initAsRootWithPreviousWord(
    634             traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
    635     const CorrectionType correctionType = spaceSubstitution ?
    636             CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
    637     Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
    638             &newDicNode, traverseSession->getMultiBigramMap());
    639     if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
    640         // newDicNode is worth continuing to traverse.
    641         // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune
    642         // here because here is not the right place to do pruning. Pruning should take place only
    643         // in DicNodePriorityQueue.
    644         traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode);
    645     }
    646 }
    647 } // namespace latinime
    648