Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/core/suggest.h"
     18 
     19 #include "suggest/core/dicnode/dic_node.h"
     20 #include "suggest/core/dicnode/dic_node_priority_queue.h"
     21 #include "suggest/core/dicnode/dic_node_vector.h"
     22 #include "suggest/core/dictionary/dictionary.h"
     23 #include "suggest/core/dictionary/digraph_utils.h"
     24 #include "suggest/core/layout/proximity_info.h"
     25 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
     26 #include "suggest/core/policy/traversal.h"
     27 #include "suggest/core/policy/weighting.h"
     28 #include "suggest/core/result/suggestions_output_utils.h"
     29 #include "suggest/core/session/dic_traverse_session.h"
     30 
     31 namespace latinime {
     32 
     33 // Initialization of class constants.
     34 const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
     35 
     36 /**
     37  * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
     38  * whether to prematurely commit the suggested words up to the given point for sentence-level
     39  * suggestion.
     40  *
     41  * Note: Currently does not support concurrent calls across threads. Continuous suggestion is
     42  * automatically activated for sequential calls that share the same starting input.
     43  * TODO: Stop detecting continuous suggestion. Start using traverseSession instead.
     44  */
     45 void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
     46         int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
     47         int inputSize, const float languageWeight,
     48         SuggestionResults *const outSuggestionResults) const {
     49     PROF_OPEN;
     50     PROF_START(0);
     51     const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
     52     DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
     53     tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times,
     54             pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount());
     55     // TODO: Add the way to evaluate cache
     56 
     57     initializeSearch(tSession);
     58     PROF_END(0);
     59     PROF_START(1);
     60 
     61     // keep expanding search dicNodes until all have terminated.
     62     while (tSession->getDicTraverseCache()->activeSize() > 0) {
     63         expandCurrentDicNodes(tSession);
     64         tSession->getDicTraverseCache()->advanceActiveDicNodes();
     65         tSession->getDicTraverseCache()->advanceInputIndex(inputSize);
     66     }
     67     PROF_END(1);
     68     PROF_START(2);
     69     SuggestionsOutputUtils::outputSuggestions(
     70             SCORING, tSession, languageWeight, outSuggestionResults);
     71     PROF_END(2);
     72     PROF_CLOSE;
     73 }
     74 
     75 /**
     76  * Initializes the search at the root of the lexicon trie. Note that when possible the search will
     77  * continue suggestion from where it left off during the last call.
     78  */
     79 void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
     80     if (!traverseSession->getProximityInfoState(0)->isUsed()) {
     81         return;
     82     }
     83 
     84     if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
     85             && traverseSession->isContinuousSuggestionPossible()) {
     86         // Continue suggestion
     87         traverseSession->getDicTraverseCache()->continueSearch();
     88     } else {
     89         // Restart recognition at the root.
     90         traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
     91                 TRAVERSAL->getTerminalCacheSize());
     92         // Create a new dic node here
     93         DicNode rootNode;
     94         DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
     95                 traverseSession->getPrevWordsPtNodePos(), &rootNode);
     96         traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
     97     }
     98 }
     99 
    100 /**
    101  * Expands the dicNodes in the current search priority queue by advancing to the possible child
    102  * nodes based on the next touch point(s) (or no touch points for lookahead)
    103  */
    104 void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
    105     const int inputSize = traverseSession->getInputSize();
    106     DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize());
    107     DicNode correctionDicNode;
    108 
    109     // TODO: Find more efficient caching
    110     const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession);
    111     if (shouldDepthLevelCache) {
    112         traverseSession->getDicTraverseCache()->updateLastCachedInputIndex();
    113     }
    114     if (DEBUG_CACHE) {
    115         AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d",
    116                 shouldDepthLevelCache, inputSize);
    117     }
    118     while (traverseSession->getDicTraverseCache()->activeSize() > 0) {
    119         DicNode dicNode;
    120         traverseSession->getDicTraverseCache()->popActive(&dicNode);
    121         if (dicNode.isTotalInputSizeExceedingLimit()) {
    122             return;
    123         }
    124         childDicNodes.clear();
    125         const int point0Index = dicNode.getInputIndex(0);
    126         const bool canDoLookAheadCorrection =
    127                 TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode);
    128         const bool isLookAheadCorrection = canDoLookAheadCorrection
    129                 && traverseSession->getDicTraverseCache()->
    130                         isLookAheadCorrectionInputIndex(static_cast<int>(point0Index));
    131         const bool isCompletion = dicNode.isCompletion(inputSize);
    132 
    133         const bool shouldNodeLevelCache =
    134                 TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode);
    135         if (shouldDepthLevelCache || shouldNodeLevelCache) {
    136             if (DEBUG_CACHE) {
    137                 dicNode.dump("PUSH_CACHE");
    138             }
    139             traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode);
    140             dicNode.setCached();
    141         }
    142 
    143         if (dicNode.isInDigraph()) {
    144             // Finish digraph handling if the node is in the middle of a digraph expansion.
    145             processDicNodeAsDigraph(traverseSession, &dicNode);
    146         } else if (isLookAheadCorrection) {
    147             // The algorithm maintains a small set of "deferred" nodes that have not consumed the
    148             // latest touch point yet. These are needed to apply look-ahead correction operations
    149             // that require special handling of the latest touch point. For example, with insertions
    150             // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all.
    151             processDicNodeAsTransposition(traverseSession, &dicNode);
    152             processDicNodeAsInsertion(traverseSession, &dicNode);
    153         } else { // !isLookAheadCorrection
    154             // Only consider typing error corrections if the normalized compound distance is
    155             // below a spatial distance threshold.
    156             // NOTE: the threshold may need to be updated if scoring model changes.
    157             // TODO: Remove. Do not prune node here.
    158             const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
    159             // Process for handling space substitution (e.g., hevis => he is)
    160             if (allowsErrorCorrections
    161                     && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
    162                 createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
    163             }
    164 
    165             DicNodeUtils::getAllChildDicNodes(
    166                     &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
    167 
    168             const int childDicNodesSize = childDicNodes.getSizeAndLock();
    169             for (int i = 0; i < childDicNodesSize; ++i) {
    170                 DicNode *const childDicNode = childDicNodes[i];
    171                 if (isCompletion) {
    172                     // Handle forward lookahead when the lexicon letter exceeds the input size.
    173                     processDicNodeAsMatch(traverseSession, childDicNode);
    174                     continue;
    175                 }
    176                 if (DigraphUtils::hasDigraphForCodePoint(
    177                         traverseSession->getDictionaryStructurePolicy()
    178                                 ->getHeaderStructurePolicy(),
    179                         childDicNode->getNodeCodePoint())) {
    180                     correctionDicNode.initByCopy(childDicNode);
    181                     correctionDicNode.advanceDigraphIndex();
    182                     processDicNodeAsDigraph(traverseSession, &correctionDicNode);
    183                 }
    184                 if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode,
    185                         allowsErrorCorrections)) {
    186                     // TODO: (Gesture) Change weight between omission and substitution errors
    187                     // TODO: (Gesture) Terminal node should not be handled as omission
    188                     correctionDicNode.initByCopy(childDicNode);
    189                     processDicNodeAsOmission(traverseSession, &correctionDicNode);
    190                 }
    191                 const ProximityType proximityType = TRAVERSAL->getProximityType(
    192                         traverseSession, &dicNode, childDicNode);
    193                 switch (proximityType) {
    194                     // TODO: Consider the difference of proximityType here
    195                     case MATCH_CHAR:
    196                     case PROXIMITY_CHAR:
    197                         processDicNodeAsMatch(traverseSession, childDicNode);
    198                         break;
    199                     case ADDITIONAL_PROXIMITY_CHAR:
    200                         if (allowsErrorCorrections) {
    201                             processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode,
    202                                     childDicNode);
    203                         }
    204                         break;
    205                     case SUBSTITUTION_CHAR:
    206                         if (allowsErrorCorrections) {
    207                             processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode);
    208                         }
    209                         break;
    210                     case UNRELATED_CHAR:
    211                         // Just drop this dicNode and do nothing.
    212                         break;
    213                     default:
    214                         // Just drop this dicNode and do nothing.
    215                         break;
    216                 }
    217             }
    218 
    219             // Push the dicNode for look-ahead correction
    220             if (allowsErrorCorrections && canDoLookAheadCorrection) {
    221                 traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
    222             }
    223         }
    224     }
    225 }
    226 
    227 void Suggest::processTerminalDicNode(
    228         DicTraverseSession *traverseSession, DicNode *dicNode) const {
    229     if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
    230         return;
    231     }
    232     if (!dicNode->isTerminalDicNode()) {
    233         return;
    234     }
    235     if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
    236         return;
    237     }
    238     if (!dicNode->hasMatchedOrProximityCodePoints()) {
    239         return;
    240     }
    241     // Create a non-cached node here.
    242     DicNode terminalDicNode(*dicNode);
    243     if (TRAVERSAL->needsToTraverseAllUserInput()
    244             && dicNode->getInputIndex(0) < traverseSession->getInputSize()) {
    245         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0,
    246                 &terminalDicNode, traverseSession->getMultiBigramMap());
    247     }
    248     Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0,
    249             &terminalDicNode, traverseSession->getMultiBigramMap());
    250     traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode);
    251 }
    252 
    253 /**
    254  * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
    255  * (by the space omission error correction) search path if input dicNode is on a terminal.
    256  */
    257 void Suggest::processExpandedDicNode(
    258         DicTraverseSession *traverseSession, DicNode *dicNode) const {
    259     processTerminalDicNode(traverseSession, dicNode);
    260     if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
    261         if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) {
    262             createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */);
    263         }
    264         const int allowsLookAhead = !(dicNode->hasMultipleWords()
    265                 && dicNode->isCompletion(traverseSession->getInputSize()));
    266         if (dicNode->hasChildren() && allowsLookAhead) {
    267             traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode);
    268         }
    269     }
    270 }
    271 
    272 void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession,
    273         DicNode *childDicNode) const {
    274     weightChildNode(traverseSession, childDicNode);
    275     processExpandedDicNode(traverseSession, childDicNode);
    276 }
    277 
    278 void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession,
    279         DicNode *dicNode, DicNode *childDicNode) const {
    280     // Note: Most types of corrections don't need to look up the bigram information since they do
    281     // not treat the node as a terminal. There is no need to pass the bigram map in these cases.
    282     Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY,
    283             traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */);
    284     weightChildNode(traverseSession, childDicNode);
    285     processExpandedDicNode(traverseSession, childDicNode);
    286 }
    287 
    288 void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
    289         DicNode *dicNode, DicNode *childDicNode) const {
    290     Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession,
    291             dicNode, childDicNode, 0 /* multiBigramMap */);
    292     weightChildNode(traverseSession, childDicNode);
    293     processExpandedDicNode(traverseSession, childDicNode);
    294 }
    295 
    296 // Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
    297 // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
    298 // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
    299 void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
    300         DicNode *childDicNode) const {
    301     weightChildNode(traverseSession, childDicNode);
    302     childDicNode->advanceDigraphIndex();
    303     processExpandedDicNode(traverseSession, childDicNode);
    304 }
    305 
    306 /**
    307  * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
    308  * matches for all possible next letters. Note that just skipping the current letter without any
    309  * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
    310  * the possible *next* letters after the omission to better limit search to plausible omissions.
    311  * Note that apostrophes are handled as omissions.
    312  */
    313 void Suggest::processDicNodeAsOmission(
    314         DicTraverseSession *traverseSession, DicNode *dicNode) const {
    315     DicNodeVector childDicNodes;
    316     DicNodeUtils::getAllChildDicNodes(
    317             dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
    318 
    319     const int size = childDicNodes.getSizeAndLock();
    320     for (int i = 0; i < size; i++) {
    321         DicNode *const childDicNode = childDicNodes[i];
    322         // Treat this word as omission
    323         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession,
    324                 dicNode, childDicNode, 0 /* multiBigramMap */);
    325         weightChildNode(traverseSession, childDicNode);
    326         if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) {
    327             continue;
    328         }
    329         processExpandedDicNode(traverseSession, childDicNode);
    330     }
    331 }
    332 
    333 /**
    334  * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and
    335  * consider matches for the next touch point.
    336  */
    337 void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
    338         DicNode *dicNode) const {
    339     const int16_t pointIndex = dicNode->getInputIndex(0);
    340     DicNodeVector childDicNodes;
    341     DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
    342             &childDicNodes);
    343     const int size = childDicNodes.getSizeAndLock();
    344     for (int i = 0; i < size; i++) {
    345         if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1)
    346                 != childDicNodes[i]->getNodeCodePoint()) {
    347             continue;
    348         }
    349         DicNode *const childDicNode = childDicNodes[i];
    350         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession,
    351                 dicNode, childDicNode, 0 /* multiBigramMap */);
    352         processExpandedDicNode(traverseSession, childDicNode);
    353     }
    354 }
    355 
    356 /**
    357  * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points.
    358  */
    359 void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
    360         DicNode *dicNode) const {
    361     const int16_t pointIndex = dicNode->getInputIndex(0);
    362     DicNodeVector childDicNodes1;
    363     DicNodeVector childDicNodes2;
    364     DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
    365             &childDicNodes1);
    366     const int childSize1 = childDicNodes1.getSizeAndLock();
    367     for (int i = 0; i < childSize1; i++) {
    368         const ProximityType matchedId1 = traverseSession->getProximityInfoState(0)
    369                 ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(),
    370                         true /* checkProximityChars */);
    371         if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) {
    372             continue;
    373         }
    374         if (childDicNodes1[i]->hasChildren()) {
    375             childDicNodes2.clear();
    376             DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
    377                     traverseSession->getDictionaryStructurePolicy(), &childDicNodes2);
    378             const int childSize2 = childDicNodes2.getSizeAndLock();
    379             for (int j = 0; j < childSize2; j++) {
    380                 DicNode *const childDicNode2 = childDicNodes2[j];
    381                 const ProximityType matchedId2 = traverseSession->getProximityInfoState(0)
    382                         ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(),
    383                                 true /* checkProximityChars */);
    384                 if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) {
    385                     continue;
    386                 }
    387                 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION,
    388                         traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */);
    389                 processExpandedDicNode(traverseSession, childDicNode2);
    390             }
    391         }
    392     }
    393 }
    394 
    395 /**
    396  * Weight child dicNode by aligning it to the key
    397  */
    398 void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
    399     const int inputSize = traverseSession->getInputSize();
    400     if (dicNode->isCompletion(inputSize)) {
    401         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession,
    402                 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
    403     } else { // completion
    404         Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession,
    405                 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
    406     }
    407 }
    408 
    409 /**
    410  * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also
    411  * incorporates the unigram / bigram score for the ending word into the new dicNode.
    412  */
    413 void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
    414         const bool spaceSubstitution) const {
    415     if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) {
    416         return;
    417     }
    418 
    419     // Create a non-cached node here.
    420     DicNode newDicNode;
    421     DicNodeUtils::initAsRootWithPreviousWord(
    422             traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
    423     const CorrectionType correctionType = spaceSubstitution ?
    424             CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
    425     Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
    426             &newDicNode, traverseSession->getMultiBigramMap());
    427     if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
    428         // newDicNode is worth continuing to traverse.
    429         // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune
    430         // here because here is not the right place to do pruning. Pruning should take place only
    431         // in DicNodePriorityQueue.
    432         traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode);
    433     }
    434 }
    435 } // namespace latinime
    436