Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <cassert>
     18 #include <cctype>
     19 #include <cmath>
     20 #include <cstring>
     21 
     22 #define LOG_TAG "LatinIME: correction.cpp"
     23 
     24 #include "char_utils.h"
     25 #include "correction.h"
     26 #include "defines.h"
     27 #include "proximity_info_state.h"
     28 
     29 namespace latinime {
     30 
     31 class ProximityInfo;
     32 
     33 /////////////////////////////
     34 // edit distance funcitons //
     35 /////////////////////////////
     36 
     37 inline static void initEditDistance(int *editDistanceTable) {
     38     for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
     39         editDistanceTable[i] = i;
     40     }
     41 }
     42 
     43 inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
     44         const int editDistanceTableWidth, const int outputLength) {
     45     if (DEBUG_DICT) {
     46         AKLOGI("EditDistanceTable");
     47         for (int i = 0; i <= 10; ++i) {
     48             int c[11];
     49             for (int j = 0; j <= 10; ++j) {
     50                 if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
     51                     c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
     52                 } else {
     53                     c[j] = -1;
     54                 }
     55             }
     56             AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
     57                     c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
     58             (void)c; // To suppress compiler warning
     59         }
     60     }
     61 }
     62 
     63 inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
     64         const int inputSize, const unsigned short *output, const int outputLength) {
     65     // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
     66     // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
     67     // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
     68     // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
     69     int *const current = editDistanceTable + outputLength * (inputSize + 1);
     70     const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
     71     const int *const prevprev =
     72             outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
     73     current[0] = outputLength;
     74     const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
     75     const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
     76     for (int i = 1; i <= inputSize; ++i) {
     77         const uint32_t ci = toBaseLowerCase(input[i - 1]);
     78         const uint16_t cost = (ci == co) ? 0 : 1;
     79         current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
     80         if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
     81             current[i] = min(current[i], prevprev[i - 2] + 1);
     82         }
     83     }
     84 }
     85 
     86 inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
     87         const int outputLength, const int inputSize) {
     88     if (DEBUG_EDIT_DISTANCE) {
     89         AKLOGI("getCurrentEditDistance %d, %d", inputSize, outputLength);
     90     }
     91     return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize];
     92 }
     93 
     94 //////////////////////
     95 // inline functions //
     96 //////////////////////
     97 static const char SINGLE_QUOTE = '\'';
     98 
     99 inline bool Correction::isSingleQuote(const unsigned short c) {
    100     const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
    101     return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
    102 }
    103 
    104 ////////////////
    105 // Correction //
    106 ////////////////
    107 
    108 void Correction::resetCorrection() {
    109     mTotalTraverseCount = 0;
    110 }
    111 
    112 void Correction::initCorrection(const ProximityInfo *pi, const int inputSize,
    113         const int maxDepth) {
    114     mProximityInfo = pi;
    115     mInputSize = inputSize;
    116     mMaxDepth = maxDepth;
    117     mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2;
    118     // TODO: This is not supposed to be required.  Check what's going wrong with
    119     // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
    120     initEditDistance(mEditDistanceTable);
    121 }
    122 
    123 void Correction::initCorrectionState(
    124         const int rootPos, const int childCount, const bool traverseAll) {
    125     latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll);
    126     // TODO: remove
    127     mCorrectionStates[0].mTransposedPos = mTransposedPos;
    128     mCorrectionStates[0].mExcessivePos = mExcessivePos;
    129     mCorrectionStates[0].mSkipPos = mSkipPos;
    130 }
    131 
    132 void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
    133         const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
    134         const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
    135     // TODO: remove
    136     mTransposedPos = transposedPos;
    137     mExcessivePos = excessivePos;
    138     mSkipPos = skipPos;
    139     // TODO: remove
    140     mCorrectionStates[0].mTransposedPos = transposedPos;
    141     mCorrectionStates[0].mExcessivePos = excessivePos;
    142     mCorrectionStates[0].mSkipPos = skipPos;
    143 
    144     mSpaceProximityPos = spaceProximityPos;
    145     mMissingSpacePos = missingSpacePos;
    146     mUseFullEditDistance = useFullEditDistance;
    147     mDoAutoCompletion = doAutoCompletion;
    148     mMaxErrors = maxErrors;
    149 }
    150 
    151 void Correction::checkState() {
    152     if (DEBUG_DICT) {
    153         int inputCount = 0;
    154         if (mSkipPos >= 0) ++inputCount;
    155         if (mExcessivePos >= 0) ++inputCount;
    156         if (mTransposedPos >= 0) ++inputCount;
    157     }
    158 }
    159 
    160 bool Correction::sameAsTyped() {
    161     return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
    162 }
    163 
    164 int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
    165         const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
    166     return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
    167             wordCount, this, isSpaceProximity, word);
    168 }
    169 
    170 int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
    171     return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
    172 }
    173 
    174 int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
    175         int *wordLength, const int inputSize) {
    176     return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
    177 }
    178 
    179 int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
    180         int *wordLength, const int inputSize) {
    181     const int outputIndex = mTerminalOutputIndex;
    182     const int inputIndex = mTerminalInputIndex;
    183     *wordLength = outputIndex + 1;
    184     *word = mWord;
    185     int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
    186             inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
    187     return finalProbability;
    188 }
    189 
    190 bool Correction::initProcessState(const int outputIndex) {
    191     if (mCorrectionStates[outputIndex].mChildCount <= 0) {
    192         return false;
    193     }
    194     mOutputIndex = outputIndex;
    195     --(mCorrectionStates[outputIndex].mChildCount);
    196     mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
    197     mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
    198 
    199     mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount;
    200     mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
    201     mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount;
    202     mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount;
    203     mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
    204     mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded;
    205 
    206     mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos;
    207     mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos;
    208     mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
    209 
    210     mMatching = false;
    211     mProximityMatching = false;
    212     mAdditionalProximityMatching = false;
    213     mTransposing = false;
    214     mExceeding = false;
    215     mSkipping = false;
    216 
    217     return true;
    218 }
    219 
    220 int Correction::goDownTree(
    221         const int parentIndex, const int childCount, const int firstChildPos) {
    222     mCorrectionStates[mOutputIndex].mParentIndex = parentIndex;
    223     mCorrectionStates[mOutputIndex].mChildCount = childCount;
    224     mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos;
    225     return mOutputIndex;
    226 }
    227 
    228 // TODO: remove
    229 int Correction::getInputIndex() const {
    230     return mInputIndex;
    231 }
    232 
    233 void Correction::incrementInputIndex() {
    234     ++mInputIndex;
    235 }
    236 
    237 void Correction::incrementOutputIndex() {
    238     ++mOutputIndex;
    239     mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
    240     mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
    241     mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
    242     mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
    243     mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
    244 
    245     mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
    246     mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
    247     mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
    248     mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
    249     mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
    250 
    251     mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
    252     mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
    253     mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
    254 
    255     mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
    256 
    257     mCorrectionStates[mOutputIndex].mMatching = mMatching;
    258     mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
    259     mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
    260     mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
    261     mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
    262     mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
    263 }
    264 
    265 void Correction::startToTraverseAllNodes() {
    266     mNeedsToTraverseAllNodes = true;
    267 }
    268 
    269 bool Correction::needsToPrune() const {
    270     // TODO: use edit distance here
    271     return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
    272             // Allow one char longer word for missing character
    273             || (!mDoAutoCompletion && (mOutputIndex > mInputSize));
    274 }
    275 
    276 void Correction::addCharToCurrentWord(const int32_t c) {
    277     mWord[mOutputIndex] = c;
    278     const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
    279     calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
    280             mWord, mOutputIndex + 1);
    281 }
    282 
    283 Correction::CorrectionType Correction::processSkipChar(
    284         const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
    285     addCharToCurrentWord(c);
    286     mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
    287     mTerminalOutputIndex = mOutputIndex;
    288     if (mNeedsToTraverseAllNodes && isTerminal) {
    289         incrementOutputIndex();
    290         return TRAVERSE_ALL_ON_TERMINAL;
    291     } else {
    292         incrementOutputIndex();
    293         return TRAVERSE_ALL_NOT_ON_TERMINAL;
    294     }
    295 }
    296 
    297 Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
    298     // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
    299     mTerminalInputIndex = mInputIndex;
    300     mTerminalOutputIndex = mOutputIndex;
    301     return UNRELATED;
    302 }
    303 
    304 inline bool isEquivalentChar(ProximityType type) {
    305     return type == EQUIVALENT_CHAR;
    306 }
    307 
    308 inline bool isProximityCharOrEquivalentChar(ProximityType type) {
    309     return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
    310 }
    311 
    312 Correction::CorrectionType Correction::processCharAndCalcState(
    313         const int32_t c, const bool isTerminal) {
    314     const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
    315     if (correctionCount > mMaxErrors) {
    316         return processUnrelatedCorrectionType();
    317     }
    318 
    319     // TODO: Change the limit if we'll allow two or more corrections
    320     const bool noCorrectionsHappenedSoFar = correctionCount == 0;
    321     const bool canTryCorrection = noCorrectionsHappenedSoFar;
    322     int proximityIndex = 0;
    323     mDistances[mOutputIndex] = NOT_A_DISTANCE;
    324 
    325     // Skip checking this node
    326     if (mNeedsToTraverseAllNodes || isSingleQuote(c)) {
    327         bool incremented = false;
    328         if (mLastCharExceeded && mInputIndex == mInputSize - 1) {
    329             // TODO: Do not check the proximity if EditDistance exceeds the threshold
    330             const ProximityType matchId = mProximityInfoState.getMatchedProximityId(
    331                     mInputIndex, c, true, &proximityIndex);
    332             if (isEquivalentChar(matchId)) {
    333                 mLastCharExceeded = false;
    334                 --mExcessiveCount;
    335                 mDistances[mOutputIndex] =
    336                         mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
    337             } else if (matchId == NEAR_PROXIMITY_CHAR) {
    338                 mLastCharExceeded = false;
    339                 --mExcessiveCount;
    340                 ++mProximityCount;
    341                 mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(
    342                         mInputIndex, proximityIndex);
    343             }
    344             if (!isSingleQuote(c)) {
    345                 incrementInputIndex();
    346                 incremented = true;
    347             }
    348         }
    349         return processSkipChar(c, isTerminal, incremented);
    350     }
    351 
    352     // Check possible corrections.
    353     if (mExcessivePos >= 0) {
    354         if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
    355             mExcessivePos = mOutputIndex;
    356         }
    357         if (mExcessivePos < mInputSize - 1) {
    358             mExceeding = mExcessivePos == mInputIndex && canTryCorrection;
    359         }
    360     }
    361 
    362     if (mSkipPos >= 0) {
    363         if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
    364             if (DEBUG_DICT) {
    365                 // TODO: Enable this assertion.
    366                 //assert(mSkipPos == mOutputIndex - 1);
    367             }
    368             mSkipPos = mOutputIndex;
    369         }
    370         mSkipping = mSkipPos == mOutputIndex && canTryCorrection;
    371     }
    372 
    373     if (mTransposedPos >= 0) {
    374         if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) {
    375             mTransposedPos = mOutputIndex;
    376         }
    377         if (mTransposedPos < mInputSize - 1) {
    378             mTransposing = mInputIndex == mTransposedPos && canTryCorrection;
    379         }
    380     }
    381 
    382     bool secondTransposing = false;
    383     if (mTransposedCount % 2 == 1) {
    384         if (isEquivalentChar(mProximityInfoState.getMatchedProximityId(
    385                 mInputIndex - 1, c, false))) {
    386             ++mTransposedCount;
    387             secondTransposing = true;
    388         } else if (mCorrectionStates[mOutputIndex].mExceeding) {
    389             --mTransposedCount;
    390             ++mExcessiveCount;
    391             --mExcessivePos;
    392             incrementInputIndex();
    393         } else {
    394             --mTransposedCount;
    395             if (DEBUG_CORRECTION
    396                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    397                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    398                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    399                 DUMP_WORD(mWord, mOutputIndex);
    400                 AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    401                         mTransposedCount, mExcessiveCount, c);
    402             }
    403             return processUnrelatedCorrectionType();
    404         }
    405     }
    406 
    407     // TODO: Change the limit if we'll allow two or more proximity chars with corrections
    408     // Work around: When the mMaxErrors is 1, we only allow just one error
    409     // including proximity correction.
    410     const bool checkProximityChars = (mMaxErrors > 1)
    411             ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
    412             : (noCorrectionsHappenedSoFar && mProximityCount == 0);
    413 
    414     ProximityType matchedProximityCharId = secondTransposing
    415             ? EQUIVALENT_CHAR
    416             : mProximityInfoState.getMatchedProximityId(
    417                     mInputIndex, c, checkProximityChars, &proximityIndex);
    418 
    419     if (UNRELATED_CHAR == matchedProximityCharId
    420             || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    421         if (canTryCorrection && mOutputIndex > 0
    422                 && mCorrectionStates[mOutputIndex].mProximityMatching
    423                 && mCorrectionStates[mOutputIndex].mExceeding
    424                 && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
    425                         mInputIndex, mWord[mOutputIndex - 1], false))) {
    426             if (DEBUG_CORRECTION
    427                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    428                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    429                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    430                 AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
    431             }
    432             // Conversion p->e
    433             // Example:
    434             // wearth ->    earth
    435             // px     -> (E)mmmmm
    436             ++mExcessiveCount;
    437             --mProximityCount;
    438             mExcessivePos = mOutputIndex - 1;
    439             ++mInputIndex;
    440             // Here, we are doing something equivalent to matchedProximityCharId,
    441             // but we already know that "excessive char correction" just happened
    442             // so that we just need to check "mProximityCount == 0".
    443             matchedProximityCharId = mProximityInfoState.getMatchedProximityId(
    444                     mInputIndex, c, mProximityCount == 0, &proximityIndex);
    445         }
    446     }
    447 
    448     if (UNRELATED_CHAR == matchedProximityCharId
    449             || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    450         if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    451             mAdditionalProximityMatching = true;
    452         }
    453         // TODO: Optimize
    454         // As the current char turned out to be an unrelated char,
    455         // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
    456         // here refers to the previous state.
    457         if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0
    458                 && !mCorrectionStates[mOutputIndex].mTransposing
    459                 && mCorrectionStates[mOutputIndex - 1].mTransposing
    460                 && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
    461                         mInputIndex, mWord[mOutputIndex - 1], false))
    462                 && isEquivalentChar(
    463                         mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
    464             // Conversion t->e
    465             // Example:
    466             // occaisional -> occa   sional
    467             // mmmmttx     -> mmmm(E)mmmmmm
    468             mTransposedCount -= 2;
    469             ++mExcessiveCount;
    470             ++mInputIndex;
    471         } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0
    472                 && !mCorrectionStates[mOutputIndex].mTransposing
    473                 && mCorrectionStates[mOutputIndex - 1].mTransposing
    474                 && isEquivalentChar(
    475                         mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
    476             // Conversion t->s
    477             // Example:
    478             // chcolate -> chocolate
    479             // mmttx    -> mmsmmmmmm
    480             mTransposedCount -= 2;
    481             ++mSkippedCount;
    482             --mInputIndex;
    483         } else if (canTryCorrection && mInputIndex > 0
    484                 && mCorrectionStates[mOutputIndex].mProximityMatching
    485                 && mCorrectionStates[mOutputIndex].mSkipping
    486                 && isEquivalentChar(
    487                         mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
    488             // Conversion p->s
    489             // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
    490             // proximity chars of "s", but it should rather be handled as a skipped char.
    491             ++mSkippedCount;
    492             --mProximityCount;
    493             return processSkipChar(c, isTerminal, false);
    494         } else if (mInputIndex - 1 < mInputSize
    495                 && mSkippedCount > 0
    496                 && mCorrectionStates[mOutputIndex].mSkipping
    497                 && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
    498                 && isProximityCharOrEquivalentChar(
    499                         mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
    500             // Conversion s->a
    501             incrementInputIndex();
    502             --mSkippedCount;
    503             mProximityMatching = true;
    504             ++mProximityCount;
    505             mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
    506         } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize
    507                 && isEquivalentChar(
    508                         mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
    509             // 1.2. Excessive or transpose correction
    510             if (mTransposing) {
    511                 ++mTransposedCount;
    512             } else {
    513                 ++mExcessiveCount;
    514                 incrementInputIndex();
    515             }
    516             if (DEBUG_CORRECTION
    517                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    518                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    519                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    520                 DUMP_WORD(mWord, mOutputIndex);
    521                 if (mTransposing) {
    522                     AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    523                             mTransposedCount, mExcessiveCount, c);
    524                 } else {
    525                     AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    526                             mTransposedCount, mExcessiveCount, c);
    527                 }
    528             }
    529         } else if (mSkipping) {
    530             // 3. Skip correction
    531             ++mSkippedCount;
    532             if (DEBUG_CORRECTION
    533                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    534                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    535                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    536                 AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    537                         mTransposedCount, mExcessiveCount, c);
    538             }
    539             return processSkipChar(c, isTerminal, false);
    540         } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
    541             // As a last resort, use additional proximity characters
    542             mProximityMatching = true;
    543             ++mProximityCount;
    544             mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
    545             if (DEBUG_CORRECTION
    546                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    547                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    548                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    549                 AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    550                         mTransposedCount, mExcessiveCount, c);
    551             }
    552         } else {
    553             if (DEBUG_CORRECTION
    554                     && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    555                     && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    556                             || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    557                 DUMP_WORD(mWord, mOutputIndex);
    558                 AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    559                         mTransposedCount, mExcessiveCount, c);
    560             }
    561             return processUnrelatedCorrectionType();
    562         }
    563     } else if (secondTransposing) {
    564         // If inputIndex is greater than mInputSize, that means there is no
    565         // proximity chars. So, we don't need to check proximity.
    566         mMatching = true;
    567     } else if (isEquivalentChar(matchedProximityCharId)) {
    568         mMatching = true;
    569         ++mEquivalentCharCount;
    570         mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
    571     } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
    572         mProximityMatching = true;
    573         ++mProximityCount;
    574         mDistances[mOutputIndex] =
    575                 mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex);
    576         if (DEBUG_CORRECTION
    577                 && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    578                 && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
    579                         || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    580             AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    581                     mTransposedCount, mExcessiveCount, c);
    582         }
    583     }
    584 
    585     addCharToCurrentWord(c);
    586 
    587     // 4. Last char excessive correction
    588     mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0
    589             && mProximityCount == 0 && (mInputIndex == mInputSize - 2);
    590     const bool isSameAsUserTypedLength = (mInputSize == mInputIndex + 1) || mLastCharExceeded;
    591     if (mLastCharExceeded) {
    592         ++mExcessiveCount;
    593     }
    594 
    595     // Start traversing all nodes after the index exceeds the user typed length
    596     if (isSameAsUserTypedLength) {
    597         startToTraverseAllNodes();
    598     }
    599 
    600     const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
    601             mExceeding && mInputIndex == mInputSize - 2;
    602 
    603     // Finally, we are ready to go to the next character, the next "virtual node".
    604     // We should advance the input index.
    605     // We do this in this branch of the 'if traverseAllNodes' because we are still matching
    606     // characters to input; the other branch is not matching them but searching for
    607     // completions, this is why it does not have to do it.
    608     incrementInputIndex();
    609     // Also, the next char is one "virtual node" depth more than this char.
    610     incrementOutputIndex();
    611 
    612     if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar
    613             || isSameAsUserTypedLength) && isTerminal) {
    614         mTerminalInputIndex = mInputIndex - 1;
    615         mTerminalOutputIndex = mOutputIndex - 1;
    616         if (DEBUG_CORRECTION
    617                 && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
    618                 && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
    619             DUMP_WORD(mWord, mOutputIndex);
    620             AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    621                     mTransposedCount, mExcessiveCount, c);
    622         }
    623         return ON_TERMINAL;
    624     } else {
    625         mTerminalInputIndex = mInputIndex - 1;
    626         mTerminalOutputIndex = mOutputIndex - 1;
    627         return NOT_ON_TERMINAL;
    628     }
    629 }
    630 
    631 inline static int getQuoteCount(const unsigned short *word, const int length) {
    632     int quoteCount = 0;
    633     for (int i = 0; i < length; ++i) {
    634         if (word[i] == SINGLE_QUOTE) {
    635             ++quoteCount;
    636         }
    637     }
    638     return quoteCount;
    639 }
    640 
    641 inline static bool isUpperCase(unsigned short c) {
    642     return isAsciiUpper(toBaseChar(c));
    643 }
    644 
    645 //////////////////////
    646 // RankingAlgorithm //
    647 //////////////////////
    648 
    649 /* static */
    650 int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
    651         const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction,
    652         const int inputSize) {
    653     const int excessivePos = correction->getExcessivePos();
    654     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
    655     const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
    656     const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState;
    657     const int skippedCount = correction->mSkippedCount;
    658     const int transposedCount = correction->mTransposedCount / 2;
    659     const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
    660     const int proximityMatchedCount = correction->mProximityCount;
    661     const bool lastCharExceeded = correction->mLastCharExceeded;
    662     const bool useFullEditDistance = correction->mUseFullEditDistance;
    663     const int outputLength = outputIndex + 1;
    664     if (skippedCount >= inputSize || inputSize == 0) {
    665         return -1;
    666     }
    667 
    668     // TODO: find more robust way
    669     bool sameLength = lastCharExceeded ? (inputSize == inputIndex + 2)
    670             : (inputSize == inputIndex + 1);
    671 
    672     // TODO: use mExcessiveCount
    673     const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
    674 
    675     const unsigned short *word = correction->mWord;
    676     const bool skipped = skippedCount > 0;
    677 
    678     const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
    679             - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputSize));
    680 
    681     // TODO: Calculate edit distance for transposed and excessive
    682     int ed = 0;
    683     if (DEBUG_DICT_FULL) {
    684         dumpEditDistance10ForDebug(editDistanceTable, correction->mInputSize, outputLength);
    685     }
    686     int adjustedProximityMatchedCount = proximityMatchedCount;
    687 
    688     int finalFreq = freq;
    689 
    690     if (DEBUG_CORRECTION_FREQ
    691             && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
    692         AKLOGI("FinalFreq0: %d", finalFreq);
    693     }
    694     // TODO: Optimize this.
    695     if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
    696         ed = getCurrentEditDistance(editDistanceTable, correction->mInputSize, outputLength,
    697                 inputSize) - transposedCount;
    698 
    699         const int matchWeight = powerIntCapped(typedLetterMultiplier,
    700                 max(inputSize, outputLength) - ed);
    701         multiplyIntCapped(matchWeight, &finalFreq);
    702 
    703         // TODO: Demote further if there are two or more excessive chars with longer user input?
    704         if (inputSize > outputLength) {
    705             multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
    706         }
    707 
    708         ed = max(0, ed - quoteDiffCount);
    709         adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputSize)),
    710                 proximityMatchedCount);
    711         if (transposedCount <= 0) {
    712             if (ed == 1 && (inputSize == outputLength - 1 || inputSize == outputLength + 1)) {
    713                 // Promote a word with just one skipped or excessive char
    714                 if (sameLength) {
    715                     multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE
    716                             + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength,
    717                             &finalFreq);
    718                 } else {
    719                     multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    720                 }
    721             } else if (ed == 0) {
    722                 multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    723                 sameLength = true;
    724             }
    725         }
    726     } else {
    727         const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
    728         multiplyIntCapped(matchWeight, &finalFreq);
    729     }
    730 
    731     if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) {
    732         multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
    733     }
    734 
    735     ///////////////////////////////////////////////
    736     // Promotion and Demotion for each correction
    737 
    738     // Demotion for a word with missing character
    739     if (skipped) {
    740         const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
    741                 * (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
    742                 / (10 * inputSize
    743                         - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
    744         if (DEBUG_DICT_FULL) {
    745             AKLOGI("Demotion rate for missing character is %d.", demotionRate);
    746         }
    747         multiplyRate(demotionRate, &finalFreq);
    748     }
    749 
    750     // Demotion for a word with transposed character
    751     if (transposedCount > 0) multiplyRate(
    752             WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
    753 
    754     // Demotion for a word with excessive character
    755     if (excessiveCount > 0) {
    756         multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
    757         if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) {
    758             if (DEBUG_DICT_FULL) {
    759                 AKLOGI("Double excessive demotion");
    760             }
    761             // If an excessive character is not adjacent to the left char or the right char,
    762             // we will demote this word.
    763             multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
    764         }
    765     }
    766 
    767     int additionalProximityCount = 0;
    768     // Demote additional proximity characters
    769     for (int i = 0; i < outputLength; ++i) {
    770         const int squaredDistance = correction->mDistances[i];
    771         if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
    772             ++additionalProximityCount;
    773         }
    774     }
    775 
    776     const bool performTouchPositionCorrection =
    777             CALIBRATE_SCORE_BY_TOUCH_COORDINATES
    778                     && proximityInfoState->touchPositionCorrectionEnabled()
    779                     && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0
    780                     && additionalProximityCount == 0;
    781 
    782     // Score calibration by touch coordinates is being done only for pure-fat finger typing error
    783     // cases.
    784     // TODO: Remove this constraint.
    785     if (performTouchPositionCorrection) {
    786         for (int i = 0; i < outputLength; ++i) {
    787             const int squaredDistance = correction->mDistances[i];
    788             if (i < adjustedProximityMatchedCount) {
    789                 multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    790             }
    791             if (squaredDistance >= 0) {
    792                 // Promote or demote the score according to the distance from the sweet spot
    793                 static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
    794                 static const float B = 1.0f;
    795                 static const float C = 0.5f;
    796                 static const float MIN = 0.3f;
    797                 static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
    798                 static const float R2 = HALF_SCORE_SQUARED_RADIUS;
    799                 const float x = static_cast<float>(squaredDistance)
    800                         / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
    801                 const float factor = max((x < R1)
    802                         ? (A * (R1 - x) + B * x) / R1
    803                         : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
    804                 // factor is a piecewise linear function like:
    805                 // A -_                  .
    806                 //     ^-_               .
    807                 // B      \              .
    808                 //         \_            .
    809                 // C         ------------.
    810                 //                       .
    811                 // 0   R1 R2             .
    812                 multiplyRate((int)(factor * 100.0f), &finalFreq);
    813             } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
    814                 multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    815             }
    816         }
    817     } else {
    818         // Promotion for a word with proximity characters
    819         for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
    820             // A word with proximity corrections
    821             if (DEBUG_DICT_FULL) {
    822                 AKLOGI("Found a proximity correction.");
    823             }
    824             multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    825             if (i < additionalProximityCount) {
    826                 multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    827             } else {
    828                 multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    829             }
    830         }
    831     }
    832 
    833     // If the user types too many(three or more) proximity characters with additional proximity
    834     // character,do not treat as the same length word.
    835     if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3
    836             || transposedCount > 0 || skipped || excessiveCount > 0)) {
    837         sameLength = false;
    838     }
    839 
    840     const int errorCount = adjustedProximityMatchedCount > 0
    841             ? adjustedProximityMatchedCount
    842             : (proximityMatchedCount + transposedCount);
    843     multiplyRate(
    844             100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputSize, &finalFreq);
    845 
    846     // Promotion for an exactly matched word
    847     if (ed == 0) {
    848         // Full exact match
    849         if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
    850                 && quoteDiffCount == 0 && additionalProximityCount == 0) {
    851             finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
    852         }
    853     }
    854 
    855     // Promote a word with no correction
    856     if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0
    857             && additionalProximityCount == 0) {
    858         multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
    859     }
    860 
    861     // TODO: Check excessive count and transposed count
    862     // TODO: Remove this if possible
    863     /*
    864          If the last character of the user input word is the same as the next character
    865          of the output word, and also all of characters of the user input are matched
    866          to the output word, we'll promote that word a bit because
    867          that word can be considered the combination of skipped and matched characters.
    868          This means that the 'sm' pattern wins over the 'ma' pattern.
    869          e.g.)
    870          shel -> shell [mmmma] or [mmmsm]
    871          hel -> hello [mmmaa] or [mmsma]
    872          m ... matching
    873          s ... skipping
    874          a ... traversing all
    875          t ... transposing
    876          e ... exceeding
    877          p ... proximity matching
    878      */
    879     if (matchCount == inputSize && matchCount >= 2 && !skipped
    880             && word[matchCount] == word[matchCount - 1]) {
    881         multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
    882     }
    883 
    884     // TODO: Do not use sameLength?
    885     if (sameLength) {
    886         multiplyIntCapped(fullWordMultiplier, &finalFreq);
    887     }
    888 
    889     if (useFullEditDistance && outputLength > inputSize + 1) {
    890         const int diff = outputLength - inputSize - 1;
    891         const int divider = diff < 31 ? 1 << diff : S_INT_MAX;
    892         finalFreq = divider > finalFreq ? 1 : finalFreq / divider;
    893     }
    894 
    895     if (DEBUG_DICT_FULL) {
    896         AKLOGI("calc: %d, %d", outputLength, sameLength);
    897     }
    898 
    899     if (DEBUG_CORRECTION_FREQ
    900             && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
    901         DUMP_WORD(correction->getPrimaryInputWord(), inputSize);
    902         DUMP_WORD(correction->mWord, outputLength);
    903         AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
    904                 skippedCount, transposedCount, excessiveCount, additionalProximityCount,
    905                 outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq);
    906     }
    907 
    908     return finalFreq;
    909 }
    910 
    911 /* static */
    912 int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
    913         const int *freqArray, const int *wordLengthArray, const int wordCount,
    914         const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
    915     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
    916 
    917     bool firstCapitalizedWordDemotion = false;
    918     bool secondCapitalizedWordDemotion = false;
    919 
    920     {
    921         // TODO: Handle multiple capitalized word demotion properly
    922         const int firstWordLength = wordLengthArray[0];
    923         const int secondWordLength = wordLengthArray[1];
    924         if (firstWordLength >= 2) {
    925             firstCapitalizedWordDemotion = isUpperCase(word[0]);
    926         }
    927 
    928         if (secondWordLength >= 2) {
    929             // FIXME: word[firstWordLength + 1] is incorrect.
    930             secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
    931         }
    932     }
    933 
    934 
    935     const bool capitalizedWordDemotion =
    936             firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
    937 
    938     int totalLength = 0;
    939     int totalFreq = 0;
    940     for (int i = 0; i < wordCount; ++i) {
    941         const int wordLength = wordLengthArray[i];
    942         if (wordLength <= 0) {
    943             return 0;
    944         }
    945         totalLength += wordLength;
    946         const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1);
    947         int tempFirstFreq = freqArray[i];
    948         multiplyRate(demotionRate, &tempFirstFreq);
    949         totalFreq += tempFirstFreq;
    950     }
    951 
    952     if (totalLength <= 0 || totalFreq <= 0) {
    953         return 0;
    954     }
    955 
    956     // TODO: Currently totalFreq is adjusted to two word metrix.
    957     // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
    958     // length.
    959     totalFreq = totalFreq * 2 / wordCount;
    960     if (wordCount > 2) {
    961         // Safety net for 3+ words -- Caveats: many heuristics and workarounds here.
    962         int oneLengthCounter = 0;
    963         int twoLengthCounter = 0;
    964         for (int i = 0; i < wordCount; ++i) {
    965             const int wordLength = wordLengthArray[i];
    966             // TODO: Use bigram instead of this safety net
    967             if (i < wordCount - 1) {
    968                 const int nextWordLength = wordLengthArray[i + 1];
    969                 if (wordLength == 1 && nextWordLength == 2) {
    970                     // Safety net to filter 1 length and 2 length sequential words
    971                     return 0;
    972                 }
    973             }
    974             const int freq = freqArray[i];
    975             // Demote too short weak words
    976             if (wordLength <= 4 && freq <= SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ) {
    977                 multiplyRate(100 * freq / MAX_FREQ, &totalFreq);
    978             }
    979             if (wordLength == 1) {
    980                 ++oneLengthCounter;
    981             } else if (wordLength == 2) {
    982                 ++twoLengthCounter;
    983             }
    984             if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) {
    985                 // Safety net to filter too many short words
    986                 return 0;
    987             }
    988         }
    989         multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq);
    990     }
    991 
    992     // This is a workaround to try offsetting the not-enough-demotion which will be done in
    993     // calcNormalizedScore in Utils.java.
    994     // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
    995     // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
    996     // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
    997     const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
    998     multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
    999 
   1000     // At this moment, totalFreq is calculated by the following formula:
   1001     // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
   1002     //        * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
   1003 
   1004     multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
   1005 
   1006     // This is another workaround to offset the demotion which will be done in
   1007     // calcNormalizedScore in Utils.java.
   1008     // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
   1009     // the same amount because we already have adjusted the synthetic freq of this "missing or
   1010     // mistyped space" suggestion candidate above in this method.
   1011     const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
   1012     multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
   1013 
   1014     if (isSpaceProximity) {
   1015         // A word pair with one space proximity correction
   1016         if (DEBUG_DICT) {
   1017             AKLOGI("Found a word pair with space proximity correction.");
   1018         }
   1019         multiplyIntCapped(typedLetterMultiplier, &totalFreq);
   1020         multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
   1021     }
   1022 
   1023     if (isSpaceProximity) {
   1024         multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq);
   1025     } else {
   1026         multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
   1027     }
   1028 
   1029     if (capitalizedWordDemotion) {
   1030         multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
   1031     }
   1032 
   1033     if (DEBUG_CORRECTION_FREQ) {
   1034         AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1],
   1035                 wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq);
   1036         DUMP_WORD(word, wordLengthArray[0]);
   1037     }
   1038 
   1039     return totalFreq;
   1040 }
   1041 
   1042 /* Damerau-Levenshtein distance */
   1043 inline static int editDistanceInternal(
   1044         int *editDistanceTable, const unsigned short *before,
   1045         const int beforeLength, const unsigned short *after, const int afterLength) {
   1046     // dp[li][lo] dp[a][b] = dp[ a * lo + b]
   1047     int *dp = editDistanceTable;
   1048     const int li = beforeLength + 1;
   1049     const int lo = afterLength + 1;
   1050     for (int i = 0; i < li; ++i) {
   1051         dp[lo * i] = i;
   1052     }
   1053     for (int i = 0; i < lo; ++i) {
   1054         dp[i] = i;
   1055     }
   1056 
   1057     for (int i = 0; i < li - 1; ++i) {
   1058         for (int j = 0; j < lo - 1; ++j) {
   1059             const uint32_t ci = toBaseLowerCase(before[i]);
   1060             const uint32_t co = toBaseLowerCase(after[j]);
   1061             const uint16_t cost = (ci == co) ? 0 : 1;
   1062             dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
   1063                     min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
   1064             if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
   1065                     && co == toBaseLowerCase(before[i - 1])) {
   1066                 dp[(i + 1) * lo + (j + 1)] = min(
   1067                         dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
   1068             }
   1069         }
   1070     }
   1071 
   1072     if (DEBUG_EDIT_DISTANCE) {
   1073         AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
   1074         for (int i = 0; i < li; ++i) {
   1075             for (int j = 0; j < lo; ++j) {
   1076                 AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
   1077             }
   1078         }
   1079     }
   1080     return dp[li * lo - 1];
   1081 }
   1082 
   1083 int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
   1084         const int beforeLength, const unsigned short *after, const int afterLength) {
   1085     int table[(beforeLength + 1) * (afterLength + 1)];
   1086     return editDistanceInternal(table, before, beforeLength, after, afterLength);
   1087 }
   1088 
   1089 
   1090 // In dictionary.cpp, getSuggestion() method,
   1091 // suggestion scores are computed using the below formula.
   1092 // original score
   1093 //  := powf(mTypedLetterMultiplier (this is defined 2),
   1094 //         (the number of matched characters between typed word and suggested word))
   1095 //     * (individual word's score which defined in the unigram dictionary,
   1096 //         and this score is defined in range [0, 255].)
   1097 // Then, the following processing is applied.
   1098 //     - If the dictionary word is matched up to the point of the user entry
   1099 //       (full match up to min(before.length(), after.length())
   1100 //       => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
   1101 //     - If the word is a true full match except for differences in accents or
   1102 //       capitalization, then treat it as if the score was 255.
   1103 //     - If before.length() == after.length()
   1104 //       => multiply by mFullWordMultiplier (this is defined 2))
   1105 // So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2
   1106 // For historical reasons we ignore the 1.2 modifier (because the measure for a good
   1107 // autocorrection threshold was done at a time when it didn't exist). This doesn't change
   1108 // the result.
   1109 // So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
   1110 
   1111 /* static */
   1112 float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
   1113         const int beforeLength, const unsigned short *after, const int afterLength,
   1114         const int score) {
   1115     if (0 == beforeLength || 0 == afterLength) {
   1116         return 0;
   1117     }
   1118     const int distance = editDistance(before, beforeLength, after, afterLength);
   1119     int spaceCount = 0;
   1120     for (int i = 0; i < afterLength; ++i) {
   1121         if (after[i] == CODE_SPACE) {
   1122             ++spaceCount;
   1123         }
   1124     }
   1125 
   1126     if (spaceCount == afterLength) {
   1127         return 0;
   1128     }
   1129 
   1130     const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
   1131             : static_cast<float>(MAX_INITIAL_SCORE)
   1132                     * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
   1133                             static_cast<float>(min(beforeLength, afterLength - spaceCount)))
   1134                     * static_cast<float>(FULL_WORD_MULTIPLIER);
   1135 
   1136     // add a weight based on edit distance.
   1137     // distance <= max(afterLength, beforeLength) == afterLength,
   1138     // so, 0 <= distance / afterLength <= 1
   1139     const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
   1140     return (static_cast<float>(score) / maxScore) * weight;
   1141 }
   1142 } // namespace latinime
   1143