1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/core/suggest.h" 18 19 #include "suggest/core/dicnode/dic_node.h" 20 #include "suggest/core/dicnode/dic_node_priority_queue.h" 21 #include "suggest/core/dicnode/dic_node_vector.h" 22 #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" 23 #include "suggest/core/dictionary/dictionary.h" 24 #include "suggest/core/dictionary/digraph_utils.h" 25 #include "suggest/core/dictionary/shortcut_utils.h" 26 #include "suggest/core/layout/proximity_info.h" 27 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 28 #include "suggest/core/policy/scoring.h" 29 #include "suggest/core/policy/traversal.h" 30 #include "suggest/core/policy/weighting.h" 31 #include "suggest/core/session/dic_traverse_session.h" 32 33 namespace latinime { 34 35 // Initialization of class constants. 36 const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; 37 const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; 38 const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; 39 40 /** 41 * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates 42 * whether to prematurely commit the suggested words up to the given point for sentence-level 43 * suggestion. 44 * 45 * Note: Currently does not support concurrent calls across threads. Continuous suggestion is 46 * automatically activated for sequential calls that share the same starting input. 47 * TODO: Stop detecting continuous suggestion. Start using traverseSession instead. 48 */ 49 int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, 50 int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, 51 int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices, 52 int *outputTypes, int *outputAutoCommitFirstWordConfidence) const { 53 PROF_OPEN; 54 PROF_START(0); 55 const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); 56 DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); 57 tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, 58 pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount()); 59 // TODO: Add the way to evaluate cache 60 61 initializeSearch(tSession, commitPoint); 62 PROF_END(0); 63 PROF_START(1); 64 65 // keep expanding search dicNodes until all have terminated. 66 while (tSession->getDicTraverseCache()->activeSize() > 0) { 67 expandCurrentDicNodes(tSession); 68 tSession->getDicTraverseCache()->advanceActiveDicNodes(); 69 tSession->getDicTraverseCache()->advanceInputIndex(inputSize); 70 } 71 PROF_END(1); 72 PROF_START(2); 73 const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes, 74 outputAutoCommitFirstWordConfidence); 75 PROF_END(2); 76 PROF_CLOSE; 77 return size; 78 } 79 80 /** 81 * Initializes the search at the root of the lexicon trie. Note that when possible the search will 82 * continue suggestion from where it left off during the last call. 83 */ 84 void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const { 85 if (!traverseSession->getProximityInfoState(0)->isUsed()) { 86 return; 87 } 88 89 // Never auto partial commit for now. 90 commitPoint = 0; 91 92 if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE 93 && traverseSession->isContinuousSuggestionPossible()) { 94 if (commitPoint == 0) { 95 // Continue suggestion 96 traverseSession->getDicTraverseCache()->continueSearch(); 97 } else { 98 // Continue suggestion after partial commit. 99 DicNode *topDicNode = 100 traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); 101 traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); 102 traverseSession->getDicTraverseCache()->continueSearch(); 103 traverseSession->setPartiallyCommited(); 104 } 105 } else { 106 // Restart recognition at the root. 107 traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()), 108 MAX_RESULTS); 109 // Create a new dic node here 110 DicNode rootNode; 111 DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), 112 traverseSession->getPrevWordPos(), &rootNode); 113 traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); 114 } 115 } 116 117 /** 118 * Outputs the final list of suggestions (i.e., terminal nodes). 119 */ 120 int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, 121 int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes, 122 int *outputAutoCommitFirstWordConfidence) const { 123 #if DEBUG_EVALUATE_MOST_PROBABLE_STRING 124 const int terminalSize = 0; 125 #else 126 const int terminalSize = min(MAX_RESULTS, 127 static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize())); 128 #endif 129 DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array 130 131 for (int index = terminalSize - 1; index >= 0; --index) { 132 traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]); 133 } 134 135 const float languageWeight = SCORING->getAdjustedLanguageWeight( 136 traverseSession, terminals, terminalSize); 137 138 int outputWordIndex = 0; 139 // Insert most probable word at index == 0 as long as there is one terminal at least 140 const bool hasMostProbableString = 141 SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, 142 &outputCodePoints[0], &outputTypes[0], &frequencies[0]); 143 if (hasMostProbableString) { 144 outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; 145 ++outputWordIndex; 146 } 147 148 // Initial value of the loop index for terminal nodes (words) 149 int doubleLetterTerminalIndex = -1; 150 DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER; 151 SCORING->searchWordWithDoubleLetter(terminals, terminalSize, 152 &doubleLetterTerminalIndex, &doubleLetterLevel); 153 154 int maxScore = S_INT_MIN; 155 // Force autocorrection for obvious long multi-word suggestions when the top suggestion is 156 // a long multiple words suggestion. 157 // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. 158 // traverseSession->isPartiallyCommited() always returns false because we never auto partial 159 // commit for now. 160 const bool forceCommitMultiWords = (terminalSize > 0) ? 161 TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop() 162 && (traverseSession->isPartiallyCommited() 163 || (traverseSession->getInputSize() 164 >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT 165 && terminals[0].hasMultipleWords())) : false; 166 // TODO: have partial commit work even with multiple pointers. 167 const bool outputSecondWordFirstLetterInputIndex = 168 traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); 169 if (terminalSize > 0) { 170 // If we have no suggestions, don't write this 171 outputAutoCommitFirstWordConfidence[0] = 172 computeFirstWordConfidence(&terminals[0]); 173 } 174 175 // Output suggestion results here 176 for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; 177 ++terminalIndex) { 178 DicNode *terminalDicNode = &terminals[terminalIndex]; 179 if (DEBUG_GEO_FULL) { 180 terminalDicNode->dump("OUT:"); 181 } 182 const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost( 183 terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); 184 const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) 185 + doubleLetterCost; 186 const bool isPossiblyOffensiveWord = 187 traverseSession->getDictionaryStructurePolicy()->getProbability( 188 terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0; 189 const bool isExactMatch = terminalDicNode->isExactMatch(); 190 const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); 191 // Heuristic: We exclude freq=0 first-char-uppercase words from exact match. 192 // (e.g. "AMD" and "and") 193 const bool isSafeExactMatch = isExactMatch 194 && !(isPossiblyOffensiveWord && isFirstCharUppercase); 195 const int outputTypeFlags = 196 (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) 197 | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); 198 199 // Entries that are blacklisted or do not represent a word should not be output. 200 const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); 201 202 // Increase output score of top typing suggestion to ensure autocorrection. 203 // TODO: Better integration with java side autocorrection logic. 204 const int finalScore = SCORING->calculateFinalScore( 205 compoundDistance, traverseSession->getInputSize(), 206 terminalDicNode->isExactMatch() 207 || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) 208 || (isValidWord && SCORING->doesAutoCorrectValidWord())); 209 if (maxScore < finalScore && isValidWord) { 210 maxScore = finalScore; 211 } 212 213 // Don't output invalid words. However, we still need to submit their shortcuts if any. 214 if (isValidWord) { 215 outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; 216 frequencies[outputWordIndex] = finalScore; 217 if (outputSecondWordFirstLetterInputIndex) { 218 outputIndicesToPartialCommit[outputWordIndex] = 219 terminalDicNode->getSecondWordFirstInputIndex( 220 traverseSession->getProximityInfoState(0)); 221 } else { 222 outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; 223 } 224 // Populate the outputChars array with the suggested word. 225 const int startIndex = outputWordIndex * MAX_WORD_LENGTH; 226 terminalDicNode->outputResult(&outputCodePoints[startIndex]); 227 ++outputWordIndex; 228 } 229 230 if (!terminalDicNode->hasMultipleWords()) { 231 BinaryDictionaryShortcutIterator shortcutIt( 232 traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), 233 traverseSession->getDictionaryStructurePolicy() 234 ->getShortcutPositionOfPtNode(terminalDicNode->getPos())); 235 // Shortcut is not supported for multiple words suggestions. 236 // TODO: Check shortcuts during traversal for multiple words suggestions. 237 const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); 238 const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, 239 outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, 240 sameAsTyped); 241 const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex( 242 traverseSession->getProximityInfoState(0)); 243 for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) { 244 if (outputSecondWordFirstLetterInputIndex) { 245 outputIndicesToPartialCommit[i] = secondWordFirstInputIndex; 246 } else { 247 outputIndicesToPartialCommit[i] = NOT_AN_INDEX; 248 } 249 } 250 outputWordIndex = updatedOutputWordIndex; 251 } 252 DicNode::managedDelete(terminalDicNode); 253 } 254 255 if (hasMostProbableString) { 256 SCORING->safetyNetForMostProbableString(terminalSize, maxScore, 257 &outputCodePoints[0], &frequencies[0]); 258 } 259 return outputWordIndex; 260 } 261 262 int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const { 263 // Get the number of spaces in the first suggestion 264 const int spaceCount = terminalDicNode->getTotalNodeSpaceCount(); 265 // Get the number of characters in the first suggestion 266 const int length = terminalDicNode->getTotalNodeCodePointCount(); 267 // Get the distance for the first word of the suggestion 268 const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord(); 269 270 // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000. 271 // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or 272 // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means 273 // we are very confident. 274 // Expected space count is 1 ~ 5 275 static const int MIN_EXPECTED_SPACE_COUNT = 1; 276 static const int MAX_EXPECTED_SPACE_COUNT = 5; 277 // Expected length is about 4 ~ 30 278 static const int MIN_EXPECTED_LENGTH = 4; 279 static const int MAX_EXPECTED_LENGTH = 30; 280 // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0 281 static const float MIN_EXPECTED_DISTANCE = 0.0; 282 static const float MAX_EXPECTED_DISTANCE = 2.0; 283 // This is not strict: it's where most stuff will be falling, but it's still fine if it's 284 // outside these values. We want to output a value that reflects all of these. Each factor 285 // contributes a bit. 286 287 // We need at least a space. 288 if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE; 289 290 // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0 291 // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the 292 // weight of the distance. Clamp to avoid overflows. 293 const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE 294 : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance; 295 const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT 296 * (MAX_EXPECTED_DISTANCE - clampedDistance) 297 / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE); 298 // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no 299 // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the 300 // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp. 301 const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT 302 * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH); 303 // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no 304 // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the 305 // weight of the space count. 306 const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT 307 * (spaceCount - MIN_EXPECTED_SPACE_COUNT) 308 / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT); 309 310 return distanceContribution + lengthContribution + spaceContribution; 311 } 312 313 /** 314 * Expands the dicNodes in the current search priority queue by advancing to the possible child 315 * nodes based on the next touch point(s) (or no touch points for lookahead) 316 */ 317 void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { 318 const int inputSize = traverseSession->getInputSize(); 319 DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize()); 320 DicNode correctionDicNode; 321 322 // TODO: Find more efficient caching 323 const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession); 324 if (shouldDepthLevelCache) { 325 traverseSession->getDicTraverseCache()->updateLastCachedInputIndex(); 326 } 327 if (DEBUG_CACHE) { 328 AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d", 329 shouldDepthLevelCache, inputSize); 330 } 331 while (traverseSession->getDicTraverseCache()->activeSize() > 0) { 332 DicNode dicNode; 333 traverseSession->getDicTraverseCache()->popActive(&dicNode); 334 if (dicNode.isTotalInputSizeExceedingLimit()) { 335 return; 336 } 337 childDicNodes.clear(); 338 const int point0Index = dicNode.getInputIndex(0); 339 const bool canDoLookAheadCorrection = 340 TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode); 341 const bool isLookAheadCorrection = canDoLookAheadCorrection 342 && traverseSession->getDicTraverseCache()-> 343 isLookAheadCorrectionInputIndex(static_cast<int>(point0Index)); 344 const bool isCompletion = dicNode.isCompletion(inputSize); 345 346 const bool shouldNodeLevelCache = 347 TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode); 348 if (shouldDepthLevelCache || shouldNodeLevelCache) { 349 if (DEBUG_CACHE) { 350 dicNode.dump("PUSH_CACHE"); 351 } 352 traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode); 353 dicNode.setCached(); 354 } 355 356 if (dicNode.isInDigraph()) { 357 // Finish digraph handling if the node is in the middle of a digraph expansion. 358 processDicNodeAsDigraph(traverseSession, &dicNode); 359 } else if (isLookAheadCorrection) { 360 // The algorithm maintains a small set of "deferred" nodes that have not consumed the 361 // latest touch point yet. These are needed to apply look-ahead correction operations 362 // that require special handling of the latest touch point. For example, with insertions 363 // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all. 364 processDicNodeAsTransposition(traverseSession, &dicNode); 365 processDicNodeAsInsertion(traverseSession, &dicNode); 366 } else { // !isLookAheadCorrection 367 // Only consider typing error corrections if the normalized compound distance is 368 // below a spatial distance threshold. 369 // NOTE: the threshold may need to be updated if scoring model changes. 370 // TODO: Remove. Do not prune node here. 371 const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode); 372 // Process for handling space substitution (e.g., hevis => he is) 373 if (allowsErrorCorrections 374 && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) { 375 createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */); 376 } 377 378 DicNodeUtils::getAllChildDicNodes( 379 &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); 380 381 const int childDicNodesSize = childDicNodes.getSizeAndLock(); 382 for (int i = 0; i < childDicNodesSize; ++i) { 383 DicNode *const childDicNode = childDicNodes[i]; 384 if (isCompletion) { 385 // Handle forward lookahead when the lexicon letter exceeds the input size. 386 processDicNodeAsMatch(traverseSession, childDicNode); 387 continue; 388 } 389 if (DigraphUtils::hasDigraphForCodePoint( 390 traverseSession->getDictionaryStructurePolicy() 391 ->getHeaderStructurePolicy(), 392 childDicNode->getNodeCodePoint())) { 393 correctionDicNode.initByCopy(childDicNode); 394 correctionDicNode.advanceDigraphIndex(); 395 processDicNodeAsDigraph(traverseSession, &correctionDicNode); 396 } 397 if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode, 398 allowsErrorCorrections)) { 399 // TODO: (Gesture) Change weight between omission and substitution errors 400 // TODO: (Gesture) Terminal node should not be handled as omission 401 correctionDicNode.initByCopy(childDicNode); 402 processDicNodeAsOmission(traverseSession, &correctionDicNode); 403 } 404 const ProximityType proximityType = TRAVERSAL->getProximityType( 405 traverseSession, &dicNode, childDicNode); 406 switch (proximityType) { 407 // TODO: Consider the difference of proximityType here 408 case MATCH_CHAR: 409 case PROXIMITY_CHAR: 410 processDicNodeAsMatch(traverseSession, childDicNode); 411 break; 412 case ADDITIONAL_PROXIMITY_CHAR: 413 if (allowsErrorCorrections) { 414 processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode, 415 childDicNode); 416 } 417 break; 418 case SUBSTITUTION_CHAR: 419 if (allowsErrorCorrections) { 420 processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode); 421 } 422 break; 423 case UNRELATED_CHAR: 424 // Just drop this node and do nothing. 425 break; 426 default: 427 // Just drop this node and do nothing. 428 break; 429 } 430 } 431 432 // Push the node for look-ahead correction 433 if (allowsErrorCorrections && canDoLookAheadCorrection) { 434 traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); 435 } 436 } 437 } 438 } 439 440 void Suggest::processTerminalDicNode( 441 DicTraverseSession *traverseSession, DicNode *dicNode) const { 442 if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 443 return; 444 } 445 if (!dicNode->isTerminalWordNode()) { 446 return; 447 } 448 if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { 449 return; 450 } 451 // Create a non-cached node here. 452 DicNode terminalDicNode; 453 DicNodeUtils::initByCopy(dicNode, &terminalDicNode); 454 if (TRAVERSAL->needsToTraverseAllUserInput() 455 && dicNode->getInputIndex(0) < traverseSession->getInputSize()) { 456 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0, 457 &terminalDicNode, traverseSession->getMultiBigramMap()); 458 } 459 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0, 460 &terminalDicNode, traverseSession->getMultiBigramMap()); 461 traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode); 462 } 463 464 /** 465 * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word 466 * (by the space omission error correction) search path if input dicNode is on a terminal node. 467 */ 468 void Suggest::processExpandedDicNode( 469 DicTraverseSession *traverseSession, DicNode *dicNode) const { 470 processTerminalDicNode(traverseSession, dicNode); 471 if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 472 if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) { 473 createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */); 474 } 475 const int allowsLookAhead = !(dicNode->hasMultipleWords() 476 && dicNode->isCompletion(traverseSession->getInputSize())); 477 if (dicNode->hasChildren() && allowsLookAhead) { 478 traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode); 479 } 480 } 481 DicNode::managedDelete(dicNode); 482 } 483 484 void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession, 485 DicNode *childDicNode) const { 486 weightChildNode(traverseSession, childDicNode); 487 processExpandedDicNode(traverseSession, childDicNode); 488 } 489 490 void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, 491 DicNode *dicNode, DicNode *childDicNode) const { 492 // Note: Most types of corrections don't need to look up the bigram information since they do 493 // not treat the node as a terminal. There is no need to pass the bigram map in these cases. 494 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY, 495 traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */); 496 weightChildNode(traverseSession, childDicNode); 497 processExpandedDicNode(traverseSession, childDicNode); 498 } 499 500 void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, 501 DicNode *dicNode, DicNode *childDicNode) const { 502 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession, 503 dicNode, childDicNode, 0 /* multiBigramMap */); 504 weightChildNode(traverseSession, childDicNode); 505 processExpandedDicNode(traverseSession, childDicNode); 506 } 507 508 // Process the node codepoint as a digraph. This means that composite glyphs like the German 509 // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with 510 // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". 511 void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, 512 DicNode *childDicNode) const { 513 weightChildNode(traverseSession, childDicNode); 514 childDicNode->advanceDigraphIndex(); 515 processExpandedDicNode(traverseSession, childDicNode); 516 } 517 518 /** 519 * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider 520 * matches for all possible next letters. Note that just skipping the current letter without any 521 * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check 522 * the possible *next* letters after the omission to better limit search to plausible omissions. 523 * Note that apostrophes are handled as omissions. 524 */ 525 void Suggest::processDicNodeAsOmission( 526 DicTraverseSession *traverseSession, DicNode *dicNode) const { 527 DicNodeVector childDicNodes; 528 DicNodeUtils::getAllChildDicNodes( 529 dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); 530 531 const int size = childDicNodes.getSizeAndLock(); 532 for (int i = 0; i < size; i++) { 533 DicNode *const childDicNode = childDicNodes[i]; 534 // Treat this word as omission 535 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession, 536 dicNode, childDicNode, 0 /* multiBigramMap */); 537 weightChildNode(traverseSession, childDicNode); 538 if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) { 539 continue; 540 } 541 processExpandedDicNode(traverseSession, childDicNode); 542 } 543 } 544 545 /** 546 * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and 547 * consider matches for the next touch point. 548 */ 549 void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, 550 DicNode *dicNode) const { 551 const int16_t pointIndex = dicNode->getInputIndex(0); 552 DicNodeVector childDicNodes; 553 DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(), 554 &childDicNodes); 555 const int size = childDicNodes.getSizeAndLock(); 556 for (int i = 0; i < size; i++) { 557 if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1) 558 != childDicNodes[i]->getNodeCodePoint()) { 559 continue; 560 } 561 DicNode *const childDicNode = childDicNodes[i]; 562 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession, 563 dicNode, childDicNode, 0 /* multiBigramMap */); 564 processExpandedDicNode(traverseSession, childDicNode); 565 } 566 } 567 568 /** 569 * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points. 570 */ 571 void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, 572 DicNode *dicNode) const { 573 const int16_t pointIndex = dicNode->getInputIndex(0); 574 DicNodeVector childDicNodes1; 575 DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(), 576 &childDicNodes1); 577 const int childSize1 = childDicNodes1.getSizeAndLock(); 578 for (int i = 0; i < childSize1; i++) { 579 const ProximityType matchedId1 = traverseSession->getProximityInfoState(0) 580 ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(), 581 true /* checkProximityChars */); 582 if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) { 583 continue; 584 } 585 if (childDicNodes1[i]->hasChildren()) { 586 DicNodeVector childDicNodes2; 587 DicNodeUtils::getAllChildDicNodes(childDicNodes1[i], 588 traverseSession->getDictionaryStructurePolicy(), &childDicNodes2); 589 const int childSize2 = childDicNodes2.getSizeAndLock(); 590 for (int j = 0; j < childSize2; j++) { 591 DicNode *const childDicNode2 = childDicNodes2[j]; 592 const ProximityType matchedId2 = traverseSession->getProximityInfoState(0) 593 ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(), 594 true /* checkProximityChars */); 595 if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) { 596 continue; 597 } 598 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION, 599 traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */); 600 processExpandedDicNode(traverseSession, childDicNode2); 601 } 602 } 603 DicNode::managedDelete(childDicNodes1[i]); 604 } 605 } 606 607 /** 608 * Weight child node by aligning it to the key 609 */ 610 void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { 611 const int inputSize = traverseSession->getInputSize(); 612 if (dicNode->isCompletion(inputSize)) { 613 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession, 614 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); 615 } else { // completion 616 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession, 617 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); 618 } 619 } 620 621 /** 622 * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also 623 * incorporates the unigram / bigram score for the ending word into the new dicNode. 624 */ 625 void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, 626 const bool spaceSubstitution) const { 627 if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) { 628 return; 629 } 630 631 // Create a non-cached node here. 632 DicNode newDicNode; 633 DicNodeUtils::initAsRootWithPreviousWord( 634 traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); 635 const CorrectionType correctionType = spaceSubstitution ? 636 CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION; 637 Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, 638 &newDicNode, traverseSession->getMultiBigramMap()); 639 if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 640 // newDicNode is worth continuing to traverse. 641 // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune 642 // here because here is not the right place to do pruning. Pruning should take place only 643 // in DicNodePriorityQueue. 644 traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode); 645 } 646 } 647 } // namespace latinime 648