Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <assert.h>
     18 #include <ctype.h>
     19 #include <math.h>
     20 #include <stdio.h>
     21 #include <string.h>
     22 
     23 #define LOG_TAG "LatinIME: correction.cpp"
     24 
     25 #include "char_utils.h"
     26 #include "correction.h"
     27 #include "defines.h"
     28 #include "dictionary.h"
     29 #include "proximity_info.h"
     30 
     31 namespace latinime {
     32 
     33 /////////////////////////////
     34 // edit distance funcitons //
     35 /////////////////////////////
     36 
     37 inline static void initEditDistance(int *editDistanceTable) {
     38     for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
     39         editDistanceTable[i] = i;
     40     }
     41 }
     42 
     43 inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
     44         const int editDistanceTableWidth, const int outputLength) {
     45     if (DEBUG_DICT) {
     46         AKLOGI("EditDistanceTable");
     47         for (int i = 0; i <= 10; ++i) {
     48             int c[11];
     49             for (int j = 0; j <= 10; ++j) {
     50                 if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
     51                     c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
     52                 } else {
     53                     c[j] = -1;
     54                 }
     55             }
     56             AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
     57                     c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
     58         }
     59     }
     60 }
     61 
     62 inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
     63         const int inputLength, const unsigned short *output, const int outputLength) {
     64     // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
     65     // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
     66     // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
     67     // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
     68     int *const current = editDistanceTable + outputLength * (inputLength + 1);
     69     const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
     70     const int *const prevprev =
     71             outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
     72     current[0] = outputLength;
     73     const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
     74     const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
     75     for (int i = 1; i <= inputLength; ++i) {
     76         const uint32_t ci = toBaseLowerCase(input[i - 1]);
     77         const uint16_t cost = (ci == co) ? 0 : 1;
     78         current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
     79         if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
     80             current[i] = min(current[i], prevprev[i - 2] + 1);
     81         }
     82     }
     83 }
     84 
     85 inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
     86         const int outputLength, const int inputLength) {
     87     if (DEBUG_EDIT_DISTANCE) {
     88         AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
     89     }
     90     return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
     91 }
     92 
     93 //////////////////////
     94 // inline functions //
     95 //////////////////////
     96 static const char QUOTE = '\'';
     97 
     98 inline bool Correction::isQuote(const unsigned short c) {
     99     const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
    100     return (c == QUOTE && userTypedChar != QUOTE);
    101 }
    102 
    103 ////////////////
    104 // Correction //
    105 ////////////////
    106 
    107 Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
    108         : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
    109     initEditDistance(mEditDistanceTable);
    110 }
    111 
    112 void Correction::resetCorrection() {
    113     mTotalTraverseCount = 0;
    114 }
    115 
    116 void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
    117         const int maxDepth) {
    118     mProximityInfo = pi;
    119     mInputLength = inputLength;
    120     mMaxDepth = maxDepth;
    121     mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
    122     // TODO: This is not supposed to be required.  Check what's going wrong with
    123     // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
    124     initEditDistance(mEditDistanceTable);
    125 }
    126 
    127 void Correction::initCorrectionState(
    128         const int rootPos, const int childCount, const bool traverseAll) {
    129     latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll);
    130     // TODO: remove
    131     mCorrectionStates[0].mTransposedPos = mTransposedPos;
    132     mCorrectionStates[0].mExcessivePos = mExcessivePos;
    133     mCorrectionStates[0].mSkipPos = mSkipPos;
    134 }
    135 
    136 void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
    137         const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
    138         const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
    139     // TODO: remove
    140     mTransposedPos = transposedPos;
    141     mExcessivePos = excessivePos;
    142     mSkipPos = skipPos;
    143     // TODO: remove
    144     mCorrectionStates[0].mTransposedPos = transposedPos;
    145     mCorrectionStates[0].mExcessivePos = excessivePos;
    146     mCorrectionStates[0].mSkipPos = skipPos;
    147 
    148     mSpaceProximityPos = spaceProximityPos;
    149     mMissingSpacePos = missingSpacePos;
    150     mUseFullEditDistance = useFullEditDistance;
    151     mDoAutoCompletion = doAutoCompletion;
    152     mMaxErrors = maxErrors;
    153 }
    154 
    155 void Correction::checkState() {
    156     if (DEBUG_DICT) {
    157         int inputCount = 0;
    158         if (mSkipPos >= 0) ++inputCount;
    159         if (mExcessivePos >= 0) ++inputCount;
    160         if (mTransposedPos >= 0) ++inputCount;
    161         // TODO: remove this assert
    162         assert(inputCount <= 1);
    163     }
    164 }
    165 
    166 int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
    167         const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
    168     return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
    169             wordCount, this, isSpaceProximity, word);
    170 }
    171 
    172 int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
    173     return getFinalProbabilityInternal(probability, word, wordLength, mInputLength);
    174 }
    175 
    176 int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
    177         int *wordLength, const int inputLength) {
    178     return getFinalProbabilityInternal(probability, word, wordLength, inputLength);
    179 }
    180 
    181 int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
    182         int *wordLength, const int inputLength) {
    183     const int outputIndex = mTerminalOutputIndex;
    184     const int inputIndex = mTerminalInputIndex;
    185     *wordLength = outputIndex + 1;
    186     if (outputIndex < MIN_SUGGEST_DEPTH) {
    187         return NOT_A_PROBABILITY;
    188     }
    189 
    190     *word = mWord;
    191     int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
    192             inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength);
    193     return finalProbability;
    194 }
    195 
    196 bool Correction::initProcessState(const int outputIndex) {
    197     if (mCorrectionStates[outputIndex].mChildCount <= 0) {
    198         return false;
    199     }
    200     mOutputIndex = outputIndex;
    201     --(mCorrectionStates[outputIndex].mChildCount);
    202     mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
    203     mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
    204 
    205     mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount;
    206     mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
    207     mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount;
    208     mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount;
    209     mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
    210     mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded;
    211 
    212     mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos;
    213     mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos;
    214     mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
    215 
    216     mMatching = false;
    217     mProximityMatching = false;
    218     mAdditionalProximityMatching = false;
    219     mTransposing = false;
    220     mExceeding = false;
    221     mSkipping = false;
    222 
    223     return true;
    224 }
    225 
    226 int Correction::goDownTree(
    227         const int parentIndex, const int childCount, const int firstChildPos) {
    228     mCorrectionStates[mOutputIndex].mParentIndex = parentIndex;
    229     mCorrectionStates[mOutputIndex].mChildCount = childCount;
    230     mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos;
    231     return mOutputIndex;
    232 }
    233 
    234 // TODO: remove
    235 int Correction::getInputIndex() {
    236     return mInputIndex;
    237 }
    238 
    239 void Correction::incrementInputIndex() {
    240     ++mInputIndex;
    241 }
    242 
    243 void Correction::incrementOutputIndex() {
    244     ++mOutputIndex;
    245     mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
    246     mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
    247     mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
    248     mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
    249     mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
    250 
    251     mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
    252     mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
    253     mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
    254     mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
    255     mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
    256 
    257     mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
    258     mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
    259     mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
    260 
    261     mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
    262 
    263     mCorrectionStates[mOutputIndex].mMatching = mMatching;
    264     mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
    265     mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
    266     mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
    267     mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
    268     mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
    269 }
    270 
    271 void Correction::startToTraverseAllNodes() {
    272     mNeedsToTraverseAllNodes = true;
    273 }
    274 
    275 bool Correction::needsToPrune() const {
    276     // TODO: use edit distance here
    277     return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
    278             // Allow one char longer word for missing character
    279             || (!mDoAutoCompletion && (mOutputIndex > mInputLength));
    280 }
    281 
    282 void Correction::addCharToCurrentWord(const int32_t c) {
    283     mWord[mOutputIndex] = c;
    284     const unsigned short *primaryInputWord = mProximityInfo->getPrimaryInputWord();
    285     calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength,
    286             mWord, mOutputIndex + 1);
    287 }
    288 
    289 Correction::CorrectionType Correction::processSkipChar(
    290         const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
    291     addCharToCurrentWord(c);
    292     mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
    293     mTerminalOutputIndex = mOutputIndex;
    294     if (mNeedsToTraverseAllNodes && isTerminal) {
    295         incrementOutputIndex();
    296         return TRAVERSE_ALL_ON_TERMINAL;
    297     } else {
    298         incrementOutputIndex();
    299         return TRAVERSE_ALL_NOT_ON_TERMINAL;
    300     }
    301 }
    302 
    303 Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
    304     // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
    305     mTerminalInputIndex = mInputIndex;
    306     mTerminalOutputIndex = mOutputIndex;
    307     return UNRELATED;
    308 }
    309 
    310 inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
    311     return type == ProximityInfo::EQUIVALENT_CHAR;
    312 }
    313 
    314 inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) {
    315     return type == ProximityInfo::EQUIVALENT_CHAR
    316             || type == ProximityInfo::NEAR_PROXIMITY_CHAR;
    317 }
    318 
    319 Correction::CorrectionType Correction::processCharAndCalcState(
    320         const int32_t c, const bool isTerminal) {
    321     const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
    322     if (correctionCount > mMaxErrors) {
    323         return processUnrelatedCorrectionType();
    324     }
    325 
    326     // TODO: Change the limit if we'll allow two or more corrections
    327     const bool noCorrectionsHappenedSoFar = correctionCount == 0;
    328     const bool canTryCorrection = noCorrectionsHappenedSoFar;
    329     int proximityIndex = 0;
    330     mDistances[mOutputIndex] = NOT_A_DISTANCE;
    331 
    332     // Skip checking this node
    333     if (mNeedsToTraverseAllNodes || isQuote(c)) {
    334         bool incremented = false;
    335         if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
    336             // TODO: Do not check the proximity if EditDistance exceeds the threshold
    337             const ProximityInfo::ProximityType matchId =
    338                     mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex);
    339             if (isEquivalentChar(matchId)) {
    340                 mLastCharExceeded = false;
    341                 --mExcessiveCount;
    342                 mDistances[mOutputIndex] =
    343                         mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
    344             } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
    345                 mLastCharExceeded = false;
    346                 --mExcessiveCount;
    347                 ++mProximityCount;
    348                 mDistances[mOutputIndex] =
    349                         mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
    350             }
    351             if (!isQuote(c)) {
    352                 incrementInputIndex();
    353                 incremented = true;
    354             }
    355         }
    356         return processSkipChar(c, isTerminal, incremented);
    357     }
    358 
    359     // Check possible corrections.
    360     if (mExcessivePos >= 0) {
    361         if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
    362             mExcessivePos = mOutputIndex;
    363         }
    364         if (mExcessivePos < mInputLength - 1) {
    365             mExceeding = mExcessivePos == mInputIndex && canTryCorrection;
    366         }
    367     }
    368 
    369     if (mSkipPos >= 0) {
    370         if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
    371             if (DEBUG_DICT) {
    372                 assert(mSkipPos == mOutputIndex - 1);
    373             }
    374             mSkipPos = mOutputIndex;
    375         }
    376         mSkipping = mSkipPos == mOutputIndex && canTryCorrection;
    377     }
    378 
    379     if (mTransposedPos >= 0) {
    380         if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) {
    381             mTransposedPos = mOutputIndex;
    382         }
    383         if (mTransposedPos < mInputLength - 1) {
    384             mTransposing = mInputIndex == mTransposedPos && canTryCorrection;
    385         }
    386     }
    387 
    388     bool secondTransposing = false;
    389     if (mTransposedCount % 2 == 1) {
    390         if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
    391             ++mTransposedCount;
    392             secondTransposing = true;
    393         } else if (mCorrectionStates[mOutputIndex].mExceeding) {
    394             --mTransposedCount;
    395             ++mExcessiveCount;
    396             --mExcessivePos;
    397             incrementInputIndex();
    398         } else {
    399             --mTransposedCount;
    400             if (DEBUG_CORRECTION
    401                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    402                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    403                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    404                 DUMP_WORD(mWord, mOutputIndex);
    405                 AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    406                         mTransposedCount, mExcessiveCount, c);
    407             }
    408             return processUnrelatedCorrectionType();
    409         }
    410     }
    411 
    412     // TODO: Change the limit if we'll allow two or more proximity chars with corrections
    413     // Work around: When the mMaxErrors is 1, we only allow just one error
    414     // including proximity correction.
    415     const bool checkProximityChars = (mMaxErrors > 1)
    416             ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
    417             : (noCorrectionsHappenedSoFar && mProximityCount == 0);
    418 
    419     ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
    420             ? ProximityInfo::EQUIVALENT_CHAR
    421             : mProximityInfo->getMatchedProximityId(
    422                     mInputIndex, c, checkProximityChars, &proximityIndex);
    423 
    424     if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
    425             || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    426         if (canTryCorrection && mOutputIndex > 0
    427                 && mCorrectionStates[mOutputIndex].mProximityMatching
    428                 && mCorrectionStates[mOutputIndex].mExceeding
    429                 && isEquivalentChar(mProximityInfo->getMatchedProximityId(
    430                         mInputIndex, mWord[mOutputIndex - 1], false))) {
    431             if (DEBUG_CORRECTION
    432                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    433                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    434                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    435                 AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
    436             }
    437             // Conversion p->e
    438             // Example:
    439             // wearth ->    earth
    440             // px     -> (E)mmmmm
    441             ++mExcessiveCount;
    442             --mProximityCount;
    443             mExcessivePos = mOutputIndex - 1;
    444             ++mInputIndex;
    445             // Here, we are doing something equivalent to matchedProximityCharId,
    446             // but we already know that "excessive char correction" just happened
    447             // so that we just need to check "mProximityCount == 0".
    448             matchedProximityCharId = mProximityInfo->getMatchedProximityId(
    449                     mInputIndex, c, mProximityCount == 0, &proximityIndex);
    450         }
    451     }
    452 
    453     if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
    454             || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    455         if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    456             mAdditionalProximityMatching = true;
    457         }
    458         // TODO: Optimize
    459         // As the current char turned out to be an unrelated char,
    460         // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
    461         // here refers to the previous state.
    462         if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0
    463                 && !mCorrectionStates[mOutputIndex].mTransposing
    464                 && mCorrectionStates[mOutputIndex - 1].mTransposing
    465                 && isEquivalentChar(mProximityInfo->getMatchedProximityId(
    466                         mInputIndex, mWord[mOutputIndex - 1], false))
    467                 && isEquivalentChar(
    468                         mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
    469             // Conversion t->e
    470             // Example:
    471             // occaisional -> occa   sional
    472             // mmmmttx     -> mmmm(E)mmmmmm
    473             mTransposedCount -= 2;
    474             ++mExcessiveCount;
    475             ++mInputIndex;
    476         } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0
    477                 && !mCorrectionStates[mOutputIndex].mTransposing
    478                 && mCorrectionStates[mOutputIndex - 1].mTransposing
    479                 && isEquivalentChar(
    480                         mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
    481             // Conversion t->s
    482             // Example:
    483             // chcolate -> chocolate
    484             // mmttx    -> mmsmmmmmm
    485             mTransposedCount -= 2;
    486             ++mSkippedCount;
    487             --mInputIndex;
    488         } else if (canTryCorrection && mInputIndex > 0
    489                 && mCorrectionStates[mOutputIndex].mProximityMatching
    490                 && mCorrectionStates[mOutputIndex].mSkipping
    491                 && isEquivalentChar(
    492                         mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
    493             // Conversion p->s
    494             // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
    495             // proximity chars of "s", but it should rather be handled as a skipped char.
    496             ++mSkippedCount;
    497             --mProximityCount;
    498             return processSkipChar(c, isTerminal, false);
    499         } else if (mInputIndex - 1 < mInputLength
    500                 && mSkippedCount > 0
    501                 && mCorrectionStates[mOutputIndex].mSkipping
    502                 && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
    503                 && isProximityCharOrEquivalentChar(
    504                         mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
    505             // Conversion s->a
    506             incrementInputIndex();
    507             --mSkippedCount;
    508             mProximityMatching = true;
    509             ++mProximityCount;
    510             mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
    511         } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
    512                 && isEquivalentChar(
    513                         mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
    514             // 1.2. Excessive or transpose correction
    515             if (mTransposing) {
    516                 ++mTransposedCount;
    517             } else {
    518                 ++mExcessiveCount;
    519                 incrementInputIndex();
    520             }
    521             if (DEBUG_CORRECTION
    522                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    523                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    524                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    525                 DUMP_WORD(mWord, mOutputIndex);
    526                 if (mTransposing) {
    527                     AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    528                             mTransposedCount, mExcessiveCount, c);
    529                 } else {
    530                     AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    531                             mTransposedCount, mExcessiveCount, c);
    532                 }
    533             }
    534         } else if (mSkipping) {
    535             // 3. Skip correction
    536             ++mSkippedCount;
    537             if (DEBUG_CORRECTION
    538                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    539                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    540                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    541                 AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    542                         mTransposedCount, mExcessiveCount, c);
    543             }
    544             return processSkipChar(c, isTerminal, false);
    545         } else if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    546             // As a last resort, use additional proximity characters
    547             mProximityMatching = true;
    548             ++mProximityCount;
    549             mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
    550             if (DEBUG_CORRECTION
    551                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    552                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    553                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    554                 AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    555                         mTransposedCount, mExcessiveCount, c);
    556             }
    557         } else {
    558             if (DEBUG_CORRECTION
    559                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    560                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    561                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    562                 DUMP_WORD(mWord, mOutputIndex);
    563                 AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    564                         mTransposedCount, mExcessiveCount, c);
    565             }
    566             return processUnrelatedCorrectionType();
    567         }
    568     } else if (secondTransposing) {
    569         // If inputIndex is greater than mInputLength, that means there is no
    570         // proximity chars. So, we don't need to check proximity.
    571         mMatching = true;
    572     } else if (isEquivalentChar(matchedProximityCharId)) {
    573         mMatching = true;
    574         ++mEquivalentCharCount;
    575         mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
    576     } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
    577         mProximityMatching = true;
    578         ++mProximityCount;
    579         mDistances[mOutputIndex] =
    580                 mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
    581         if (DEBUG_CORRECTION
    582                 && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    583                 && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    584                         || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    585             AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    586                     mTransposedCount, mExcessiveCount, c);
    587         }
    588     }
    589 
    590     addCharToCurrentWord(c);
    591 
    592     // 4. Last char excessive correction
    593     mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0
    594             && mProximityCount == 0 && (mInputIndex == mInputLength - 2);
    595     const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
    596     if (mLastCharExceeded) {
    597         ++mExcessiveCount;
    598     }
    599 
    600     // Start traversing all nodes after the index exceeds the user typed length
    601     if (isSameAsUserTypedLength) {
    602         startToTraverseAllNodes();
    603     }
    604 
    605     const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
    606             mExceeding && mInputIndex == mInputLength - 2;
    607 
    608     // Finally, we are ready to go to the next character, the next "virtual node".
    609     // We should advance the input index.
    610     // We do this in this branch of the 'if traverseAllNodes' because we are still matching
    611     // characters to input; the other branch is not matching them but searching for
    612     // completions, this is why it does not have to do it.
    613     incrementInputIndex();
    614     // Also, the next char is one "virtual node" depth more than this char.
    615     incrementOutputIndex();
    616 
    617     if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar
    618             || isSameAsUserTypedLength) && isTerminal) {
    619         mTerminalInputIndex = mInputIndex - 1;
    620         mTerminalOutputIndex = mOutputIndex - 1;
    621         if (DEBUG_CORRECTION
    622                 && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
    623                 && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    624             DUMP_WORD(mWord, mOutputIndex);
    625             AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    626                     mTransposedCount, mExcessiveCount, c);
    627         }
    628         return ON_TERMINAL;
    629     } else {
    630         mTerminalInputIndex = mInputIndex - 1;
    631         mTerminalOutputIndex = mOutputIndex - 1;
    632         return NOT_ON_TERMINAL;
    633     }
    634 }
    635 
    636 Correction::~Correction() {
    637 }
    638 
    639 inline static int getQuoteCount(const unsigned short* word, const int length) {
    640     int quoteCount = 0;
    641     for (int i = 0; i < length; ++i) {
    642         if(word[i] == '\'') {
    643             ++quoteCount;
    644         }
    645     }
    646     return quoteCount;
    647 }
    648 
    649 inline static bool isUpperCase(unsigned short c) {
    650     return isAsciiUpper(toBaseChar(c));
    651 }
    652 
    653 //////////////////////
    654 // RankingAlgorithm //
    655 //////////////////////
    656 
    657 /* static */
    658 int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
    659         const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction,
    660         const int inputLength) {
    661     const int excessivePos = correction->getExcessivePos();
    662     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
    663     const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
    664     const ProximityInfo *proximityInfo = correction->mProximityInfo;
    665     const int skippedCount = correction->mSkippedCount;
    666     const int transposedCount = correction->mTransposedCount / 2;
    667     const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
    668     const int proximityMatchedCount = correction->mProximityCount;
    669     const bool lastCharExceeded = correction->mLastCharExceeded;
    670     const bool useFullEditDistance = correction->mUseFullEditDistance;
    671     const int outputLength = outputIndex + 1;
    672     if (skippedCount >= inputLength || inputLength == 0) {
    673         return -1;
    674     }
    675 
    676     // TODO: find more robust way
    677     bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
    678             : (inputLength == inputIndex + 1);
    679 
    680     // TODO: use mExcessiveCount
    681     const int matchCount = inputLength - correction->mProximityCount - excessiveCount;
    682 
    683     const unsigned short* word = correction->mWord;
    684     const bool skipped = skippedCount > 0;
    685 
    686     const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
    687             - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
    688 
    689     // TODO: Calculate edit distance for transposed and excessive
    690     int ed = 0;
    691     if (DEBUG_DICT_FULL) {
    692         dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
    693     }
    694     int adjustedProximityMatchedCount = proximityMatchedCount;
    695 
    696     int finalFreq = freq;
    697 
    698     if (DEBUG_CORRECTION_FREQ
    699             && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
    700         AKLOGI("FinalFreq0: %d", finalFreq);
    701     }
    702     // TODO: Optimize this.
    703     if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
    704         ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
    705                 inputLength) - transposedCount;
    706 
    707         const int matchWeight = powerIntCapped(typedLetterMultiplier,
    708                 max(inputLength, outputLength) - ed);
    709         multiplyIntCapped(matchWeight, &finalFreq);
    710 
    711         // TODO: Demote further if there are two or more excessive chars with longer user input?
    712         if (inputLength > outputLength) {
    713             multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
    714         }
    715 
    716         ed = max(0, ed - quoteDiffCount);
    717         adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
    718                 proximityMatchedCount);
    719         if (transposedCount <= 0) {
    720             if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
    721                 // Promote a word with just one skipped or excessive char
    722                 if (sameLength) {
    723                     multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE
    724                             + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength,
    725                             &finalFreq);
    726                 } else {
    727                     multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    728                 }
    729             } else if (ed == 0) {
    730                 multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    731                 sameLength = true;
    732             }
    733         }
    734     } else {
    735         const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
    736         multiplyIntCapped(matchWeight, &finalFreq);
    737     }
    738 
    739     if (proximityInfo->getMatchedProximityId(0, word[0], true)
    740             == ProximityInfo::UNRELATED_CHAR) {
    741         multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
    742     }
    743 
    744     ///////////////////////////////////////////////
    745     // Promotion and Demotion for each correction
    746 
    747     // Demotion for a word with missing character
    748     if (skipped) {
    749         const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
    750                 * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
    751                 / (10 * inputLength
    752                         - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
    753         if (DEBUG_DICT_FULL) {
    754             AKLOGI("Demotion rate for missing character is %d.", demotionRate);
    755         }
    756         multiplyRate(demotionRate, &finalFreq);
    757     }
    758 
    759     // Demotion for a word with transposed character
    760     if (transposedCount > 0) multiplyRate(
    761             WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
    762 
    763     // Demotion for a word with excessive character
    764     if (excessiveCount > 0) {
    765         multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
    766         if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
    767             if (DEBUG_DICT_FULL) {
    768                 AKLOGI("Double excessive demotion");
    769             }
    770             // If an excessive character is not adjacent to the left char or the right char,
    771             // we will demote this word.
    772             multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
    773         }
    774     }
    775 
    776     const bool performTouchPositionCorrection =
    777             CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled()
    778                         && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
    779     // Score calibration by touch coordinates is being done only for pure-fat finger typing error
    780     // cases.
    781     int additionalProximityCount = 0;
    782     // TODO: Remove this constraint.
    783     if (performTouchPositionCorrection) {
    784         for (int i = 0; i < outputLength; ++i) {
    785             const int squaredDistance = correction->mDistances[i];
    786             if (i < adjustedProximityMatchedCount) {
    787                 multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    788             }
    789             if (squaredDistance >= 0) {
    790                 // Promote or demote the score according to the distance from the sweet spot
    791                 static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
    792                 static const float B = 1.0f;
    793                 static const float C = 0.5f;
    794                 static const float MIN = 0.3f;
    795                 static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
    796                 static const float R2 = HALF_SCORE_SQUARED_RADIUS;
    797                 const float x = (float)squaredDistance
    798                         / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
    799                 const float factor = max((x < R1)
    800                     ? (A * (R1 - x) + B * x) / R1
    801                     : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
    802                 // factor is piecewise linear function like:
    803                 // A -_                  .
    804                 //     ^-_               .
    805                 // B      \              .
    806                 //         \_            .
    807                 // C         ------------.
    808                 //                       .
    809                 // 0   R1 R2             .
    810                 multiplyRate((int)(factor * 100), &finalFreq);
    811             } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
    812                 multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    813             } else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
    814                 ++additionalProximityCount;
    815                 multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    816             }
    817         }
    818     } else {
    819         // Demote additional proximity characters
    820         for (int i = 0; i < outputLength; ++i) {
    821             const int squaredDistance = correction->mDistances[i];
    822             if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
    823                 ++additionalProximityCount;
    824             }
    825         }
    826         // Promotion for a word with proximity characters
    827         for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
    828             // A word with proximity corrections
    829             if (DEBUG_DICT_FULL) {
    830                 AKLOGI("Found a proximity correction.");
    831             }
    832             multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    833             if (i < additionalProximityCount) {
    834                 multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    835             } else {
    836                 multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    837             }
    838         }
    839     }
    840 
    841     // If the user types too many(three or more) proximity characters with additional proximity
    842     // character,do not treat as the same length word.
    843     if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3
    844             || transposedCount > 0 || skipped || excessiveCount > 0)) {
    845         sameLength = false;
    846     }
    847 
    848     const int errorCount = adjustedProximityMatchedCount > 0
    849             ? adjustedProximityMatchedCount
    850             : (proximityMatchedCount + transposedCount);
    851     multiplyRate(
    852             100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
    853 
    854     // Promotion for an exactly matched word
    855     if (ed == 0) {
    856         // Full exact match
    857         if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
    858                 && quoteDiffCount == 0 && additionalProximityCount == 0) {
    859             finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
    860         }
    861     }
    862 
    863     // Promote a word with no correction
    864     if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0
    865             && additionalProximityCount == 0) {
    866         multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
    867     }
    868 
    869     // TODO: Check excessive count and transposed count
    870     // TODO: Remove this if possible
    871     /*
    872          If the last character of the user input word is the same as the next character
    873          of the output word, and also all of characters of the user input are matched
    874          to the output word, we'll promote that word a bit because
    875          that word can be considered the combination of skipped and matched characters.
    876          This means that the 'sm' pattern wins over the 'ma' pattern.
    877          e.g.)
    878          shel -> shell [mmmma] or [mmmsm]
    879          hel -> hello [mmmaa] or [mmsma]
    880          m ... matching
    881          s ... skipping
    882          a ... traversing all
    883          t ... transposing
    884          e ... exceeding
    885          p ... proximity matching
    886      */
    887     if (matchCount == inputLength && matchCount >= 2 && !skipped
    888             && word[matchCount] == word[matchCount - 1]) {
    889         multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
    890     }
    891 
    892     // TODO: Do not use sameLength?
    893     if (sameLength) {
    894         multiplyIntCapped(fullWordMultiplier, &finalFreq);
    895     }
    896 
    897     if (useFullEditDistance && outputLength > inputLength + 1) {
    898         const int diff = outputLength - inputLength - 1;
    899         const int divider = diff < 31 ? 1 << diff : S_INT_MAX;
    900         finalFreq = divider > finalFreq ? 1 : finalFreq / divider;
    901     }
    902 
    903     if (DEBUG_DICT_FULL) {
    904         AKLOGI("calc: %d, %d", outputLength, sameLength);
    905     }
    906 
    907     if (DEBUG_CORRECTION_FREQ
    908             && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
    909         DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength);
    910         DUMP_WORD(correction->mWord, outputLength);
    911         AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
    912                 skippedCount, transposedCount, excessiveCount, additionalProximityCount,
    913                 outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq);
    914     }
    915 
    916     return finalFreq;
    917 }
    918 
    919 /* static */
    920 int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
    921         const int *freqArray, const int *wordLengthArray, const int wordCount,
    922         const Correction* correction, const bool isSpaceProximity, const unsigned short *word) {
    923     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
    924 
    925     bool firstCapitalizedWordDemotion = false;
    926     bool secondCapitalizedWordDemotion = false;
    927 
    928     {
    929         // TODO: Handle multiple capitalized word demotion properly
    930         const int firstWordLength = wordLengthArray[0];
    931         const int secondWordLength = wordLengthArray[1];
    932         if (firstWordLength >= 2) {
    933             firstCapitalizedWordDemotion = isUpperCase(word[0]);
    934         }
    935 
    936         if (secondWordLength >= 2) {
    937             // FIXME: word[firstWordLength + 1] is incorrect.
    938             secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
    939         }
    940     }
    941 
    942 
    943     const bool capitalizedWordDemotion =
    944             firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
    945 
    946     int totalLength = 0;
    947     int totalFreq = 0;
    948     for (int i = 0; i < wordCount; ++i){
    949         const int wordLength = wordLengthArray[i];
    950         if (wordLength <= 0) {
    951             return 0;
    952         }
    953         totalLength += wordLength;
    954         const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1);
    955         int tempFirstFreq = freqArray[i];
    956         multiplyRate(demotionRate, &tempFirstFreq);
    957         totalFreq += tempFirstFreq;
    958     }
    959 
    960     if (totalLength <= 0 || totalFreq <= 0) {
    961         return 0;
    962     }
    963 
    964     // TODO: Currently totalFreq is adjusted to two word metrix.
    965     // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
    966     // length.
    967     totalFreq = totalFreq * 2 / wordCount;
    968     if (wordCount > 2) {
    969         // Safety net for 3+ words -- Caveats: many heuristics and workarounds here.
    970         int oneLengthCounter = 0;
    971         int twoLengthCounter = 0;
    972         for (int i = 0; i < wordCount; ++i) {
    973             const int wordLength = wordLengthArray[i];
    974             // TODO: Use bigram instead of this safety net
    975             if (i < wordCount - 1) {
    976                 const int nextWordLength = wordLengthArray[i + 1];
    977                 if (wordLength == 1 && nextWordLength == 2) {
    978                     // Safety net to filter 1 length and 2 length sequential words
    979                     return 0;
    980                 }
    981             }
    982             const int freq = freqArray[i];
    983             // Demote too short weak words
    984             if (wordLength <= 4 && freq <= SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ) {
    985                 multiplyRate(100 * freq / MAX_FREQ, &totalFreq);
    986             }
    987             if (wordLength == 1) {
    988                 ++oneLengthCounter;
    989             } else if (wordLength == 2) {
    990                 ++twoLengthCounter;
    991             }
    992             if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) {
    993                 // Safety net to filter too many short words
    994                 return 0;
    995             }
    996         }
    997         multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq);
    998     }
    999 
   1000     // This is a workaround to try offsetting the not-enough-demotion which will be done in
   1001     // calcNormalizedScore in Utils.java.
   1002     // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
   1003     // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
   1004     // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
   1005     const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
   1006     multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
   1007 
   1008     // At this moment, totalFreq is calculated by the following formula:
   1009     // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
   1010     //        * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
   1011 
   1012     multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
   1013 
   1014     // This is another workaround to offset the demotion which will be done in
   1015     // calcNormalizedScore in Utils.java.
   1016     // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
   1017     // the same amount because we already have adjusted the synthetic freq of this "missing or
   1018     // mistyped space" suggestion candidate above in this method.
   1019     const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
   1020     multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
   1021 
   1022     if (isSpaceProximity) {
   1023         // A word pair with one space proximity correction
   1024         if (DEBUG_DICT) {
   1025             AKLOGI("Found a word pair with space proximity correction.");
   1026         }
   1027         multiplyIntCapped(typedLetterMultiplier, &totalFreq);
   1028         multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
   1029     }
   1030 
   1031     if (isSpaceProximity) {
   1032         multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq);
   1033     } else {
   1034         multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
   1035     }
   1036 
   1037     if (capitalizedWordDemotion) {
   1038         multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
   1039     }
   1040 
   1041     if (DEBUG_CORRECTION_FREQ) {
   1042         AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1],
   1043                 wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq);
   1044         DUMP_WORD(word, wordLengthArray[0]);
   1045     }
   1046 
   1047     return totalFreq;
   1048 }
   1049 
   1050 /* Damerau-Levenshtein distance */
   1051 inline static int editDistanceInternal(
   1052         int* editDistanceTable, const unsigned short* before,
   1053         const int beforeLength, const unsigned short* after, const int afterLength) {
   1054     // dp[li][lo] dp[a][b] = dp[ a * lo + b]
   1055     int* dp = editDistanceTable;
   1056     const int li = beforeLength + 1;
   1057     const int lo = afterLength + 1;
   1058     for (int i = 0; i < li; ++i) {
   1059         dp[lo * i] = i;
   1060     }
   1061     for (int i = 0; i < lo; ++i) {
   1062         dp[i] = i;
   1063     }
   1064 
   1065     for (int i = 0; i < li - 1; ++i) {
   1066         for (int j = 0; j < lo - 1; ++j) {
   1067             const uint32_t ci = toBaseLowerCase(before[i]);
   1068             const uint32_t co = toBaseLowerCase(after[j]);
   1069             const uint16_t cost = (ci == co) ? 0 : 1;
   1070             dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
   1071                     min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
   1072             if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
   1073                     && co == toBaseLowerCase(before[i - 1])) {
   1074                 dp[(i + 1) * lo + (j + 1)] = min(
   1075                         dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
   1076             }
   1077         }
   1078     }
   1079 
   1080     if (DEBUG_EDIT_DISTANCE) {
   1081         AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
   1082         for (int i = 0; i < li; ++i) {
   1083             for (int j = 0; j < lo; ++j) {
   1084                 AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
   1085             }
   1086         }
   1087     }
   1088     return dp[li * lo - 1];
   1089 }
   1090 
   1091 int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
   1092         const int beforeLength, const unsigned short* after, const int afterLength) {
   1093     int table[(beforeLength + 1) * (afterLength + 1)];
   1094     return editDistanceInternal(table, before, beforeLength, after, afterLength);
   1095 }
   1096 
   1097 
   1098 // In dictionary.cpp, getSuggestion() method,
   1099 // suggestion scores are computed using the below formula.
   1100 // original score
   1101 //  := pow(mTypedLetterMultiplier (this is defined 2),
   1102 //         (the number of matched characters between typed word and suggested word))
   1103 //     * (individual word's score which defined in the unigram dictionary,
   1104 //         and this score is defined in range [0, 255].)
   1105 // Then, the following processing is applied.
   1106 //     - If the dictionary word is matched up to the point of the user entry
   1107 //       (full match up to min(before.length(), after.length())
   1108 //       => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
   1109 //     - If the word is a true full match except for differences in accents or
   1110 //       capitalization, then treat it as if the score was 255.
   1111 //     - If before.length() == after.length()
   1112 //       => multiply by mFullWordMultiplier (this is defined 2))
   1113 // So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
   1114 // For historical reasons we ignore the 1.2 modifier (because the measure for a good
   1115 // autocorrection threshold was done at a time when it didn't exist). This doesn't change
   1116 // the result.
   1117 // So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
   1118 
   1119 /* static */
   1120 float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
   1121         const int beforeLength, const unsigned short* after, const int afterLength,
   1122         const int score) {
   1123     if (0 == beforeLength || 0 == afterLength) {
   1124         return 0;
   1125     }
   1126     const int distance = editDistance(before, beforeLength, after, afterLength);
   1127     int spaceCount = 0;
   1128     for (int i = 0; i < afterLength; ++i) {
   1129         if (after[i] == CODE_SPACE) {
   1130             ++spaceCount;
   1131         }
   1132     }
   1133 
   1134     if (spaceCount == afterLength) {
   1135         return 0;
   1136     }
   1137 
   1138     const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
   1139             * pow((float)TYPED_LETTER_MULTIPLIER,
   1140                     (float)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
   1141 
   1142     // add a weight based on edit distance.
   1143     // distance <= max(afterLength, beforeLength) == afterLength,
   1144     // so, 0 <= distance / afterLength <= 1
   1145     const float weight = 1.0 - (float) distance / afterLength;
   1146     return (score / maxScore) * weight;
   1147 }
   1148 
   1149 } // namespace latinime
   1150