Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <assert.h>
     18 #include <ctype.h>
     19 #include <stdio.h>
     20 #include <string.h>
     21 
     22 #define LOG_TAG "LatinIME: correction.cpp"
     23 
     24 #include "correction.h"
     25 #include "dictionary.h"
     26 #include "proximity_info.h"
     27 
     28 namespace latinime {
     29 
     30 //////////////////////
     31 // inline functions //
     32 //////////////////////
     33 static const char QUOTE = '\'';
     34 
     35 inline bool Correction::isQuote(const unsigned short c) {
     36     const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
     37     return (c == QUOTE && userTypedChar != QUOTE);
     38 }
     39 
     40 ////////////////
     41 // Correction //
     42 ////////////////
     43 
     44 Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
     45         : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
     46 }
     47 
     48 void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
     49         const int maxDepth) {
     50     mProximityInfo = pi;
     51     mInputLength = inputLength;
     52     mMaxDepth = maxDepth;
     53     mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
     54 }
     55 
     56 void Correction::initCorrectionState(
     57         const int rootPos, const int childCount, const bool traverseAll) {
     58     latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll);
     59     // TODO: remove
     60     mCorrectionStates[0].mTransposedPos = mTransposedPos;
     61     mCorrectionStates[0].mExcessivePos = mExcessivePos;
     62     mCorrectionStates[0].mSkipPos = mSkipPos;
     63 }
     64 
     65 void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
     66         const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
     67         const bool useFullEditDistance) {
     68     // TODO: remove
     69     mTransposedPos = transposedPos;
     70     mExcessivePos = excessivePos;
     71     mSkipPos = skipPos;
     72     // TODO: remove
     73     mCorrectionStates[0].mTransposedPos = transposedPos;
     74     mCorrectionStates[0].mExcessivePos = excessivePos;
     75     mCorrectionStates[0].mSkipPos = skipPos;
     76 
     77     mSpaceProximityPos = spaceProximityPos;
     78     mMissingSpacePos = missingSpacePos;
     79     mUseFullEditDistance = useFullEditDistance;
     80 }
     81 
     82 void Correction::checkState() {
     83     if (DEBUG_DICT) {
     84         int inputCount = 0;
     85         if (mSkipPos >= 0) ++inputCount;
     86         if (mExcessivePos >= 0) ++inputCount;
     87         if (mTransposedPos >= 0) ++inputCount;
     88         // TODO: remove this assert
     89         assert(inputCount <= 1);
     90     }
     91 }
     92 
     93 int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
     94         const unsigned short *word) {
     95     return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
     96             firstFreq, secondFreq, this, word);
     97 }
     98 
     99 int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
    100     const int outputIndex = mTerminalOutputIndex;
    101     const int inputIndex = mTerminalInputIndex;
    102     *wordLength = outputIndex + 1;
    103     if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
    104         return -1;
    105     }
    106 
    107     *word = mWord;
    108     return Correction::RankingAlgorithm::calculateFinalFreq(
    109             inputIndex, outputIndex, freq, mEditDistanceTable, this);
    110 }
    111 
    112 bool Correction::initProcessState(const int outputIndex) {
    113     if (mCorrectionStates[outputIndex].mChildCount <= 0) {
    114         return false;
    115     }
    116     mOutputIndex = outputIndex;
    117     --(mCorrectionStates[outputIndex].mChildCount);
    118     mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
    119     mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
    120 
    121     mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount;
    122     mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
    123     mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount;
    124     mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount;
    125     mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
    126     mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded;
    127 
    128     mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos;
    129     mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos;
    130     mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
    131 
    132     mMatching = false;
    133     mProximityMatching = false;
    134     mTransposing = false;
    135     mExceeding = false;
    136     mSkipping = false;
    137 
    138     return true;
    139 }
    140 
    141 int Correction::goDownTree(
    142         const int parentIndex, const int childCount, const int firstChildPos) {
    143     mCorrectionStates[mOutputIndex].mParentIndex = parentIndex;
    144     mCorrectionStates[mOutputIndex].mChildCount = childCount;
    145     mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos;
    146     return mOutputIndex;
    147 }
    148 
    149 // TODO: remove
    150 int Correction::getOutputIndex() {
    151     return mOutputIndex;
    152 }
    153 
    154 // TODO: remove
    155 int Correction::getInputIndex() {
    156     return mInputIndex;
    157 }
    158 
    159 // TODO: remove
    160 bool Correction::needsToTraverseAllNodes() {
    161     return mNeedsToTraverseAllNodes;
    162 }
    163 
    164 void Correction::incrementInputIndex() {
    165     ++mInputIndex;
    166 }
    167 
    168 void Correction::incrementOutputIndex() {
    169     ++mOutputIndex;
    170     mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
    171     mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
    172     mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
    173     mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
    174     mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
    175 
    176     mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
    177     mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
    178     mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
    179     mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
    180     mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
    181 
    182     mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
    183     mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
    184     mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
    185 
    186     mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
    187 
    188     mCorrectionStates[mOutputIndex].mMatching = mMatching;
    189     mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
    190     mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
    191     mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
    192     mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
    193 }
    194 
    195 void Correction::startToTraverseAllNodes() {
    196     mNeedsToTraverseAllNodes = true;
    197 }
    198 
    199 bool Correction::needsToPrune() const {
    200     return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
    201 }
    202 
    203 // TODO: inline?
    204 Correction::CorrectionType Correction::processSkipChar(
    205         const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
    206     mWord[mOutputIndex] = c;
    207     if (needsToTraverseAllNodes() && isTerminal) {
    208         mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
    209         mTerminalOutputIndex = mOutputIndex;
    210         incrementOutputIndex();
    211         return TRAVERSE_ALL_ON_TERMINAL;
    212     } else {
    213         incrementOutputIndex();
    214         return TRAVERSE_ALL_NOT_ON_TERMINAL;
    215     }
    216 }
    217 
    218 inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
    219     return type == ProximityInfo::EQUIVALENT_CHAR;
    220 }
    221 
    222 Correction::CorrectionType Correction::processCharAndCalcState(
    223         const int32_t c, const bool isTerminal) {
    224     const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
    225     // TODO: Change the limit if we'll allow two or more corrections
    226     const bool noCorrectionsHappenedSoFar = correctionCount == 0;
    227     const bool canTryCorrection = noCorrectionsHappenedSoFar;
    228     int proximityIndex = 0;
    229     mDistances[mOutputIndex] = NOT_A_DISTANCE;
    230 
    231     if (mNeedsToTraverseAllNodes || isQuote(c)) {
    232         bool incremented = false;
    233         if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
    234             // TODO: Do not check the proximity if EditDistance exceeds the threshold
    235             const ProximityInfo::ProximityType matchId =
    236                     mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex);
    237             if (isEquivalentChar(matchId)) {
    238                 mLastCharExceeded = false;
    239                 --mExcessiveCount;
    240                 mDistances[mOutputIndex] =
    241                         mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
    242             } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
    243                 mLastCharExceeded = false;
    244                 --mExcessiveCount;
    245                 ++mProximityCount;
    246                 mDistances[mOutputIndex] =
    247                         mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
    248             }
    249             incrementInputIndex();
    250             incremented = true;
    251         }
    252         return processSkipChar(c, isTerminal, incremented);
    253     }
    254 
    255     if (mExcessivePos >= 0) {
    256         if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
    257             mExcessivePos = mOutputIndex;
    258         }
    259         if (mExcessivePos < mInputLength - 1) {
    260             mExceeding = mExcessivePos == mInputIndex && canTryCorrection;
    261         }
    262     }
    263 
    264     if (mSkipPos >= 0) {
    265         if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
    266             if (DEBUG_DICT) {
    267                 assert(mSkipPos == mOutputIndex - 1);
    268             }
    269             mSkipPos = mOutputIndex;
    270         }
    271         mSkipping = mSkipPos == mOutputIndex && canTryCorrection;
    272     }
    273 
    274     if (mTransposedPos >= 0) {
    275         if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) {
    276             mTransposedPos = mOutputIndex;
    277         }
    278         if (mTransposedPos < mInputLength - 1) {
    279             mTransposing = mInputIndex == mTransposedPos && canTryCorrection;
    280         }
    281     }
    282 
    283     bool secondTransposing = false;
    284     if (mTransposedCount % 2 == 1) {
    285         if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
    286             ++mTransposedCount;
    287             secondTransposing = true;
    288         } else if (mCorrectionStates[mOutputIndex].mExceeding) {
    289             --mTransposedCount;
    290             ++mExcessiveCount;
    291             --mExcessivePos;
    292             incrementInputIndex();
    293         } else {
    294             --mTransposedCount;
    295             if (DEBUG_CORRECTION) {
    296                 DUMP_WORD(mWord, mOutputIndex);
    297                 LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    298                         mTransposedCount, mExcessiveCount, c);
    299             }
    300             return UNRELATED;
    301         }
    302     }
    303 
    304     // TODO: Change the limit if we'll allow two or more proximity chars with corrections
    305     const bool checkProximityChars = noCorrectionsHappenedSoFar ||  mProximityCount == 0;
    306     ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
    307             ? ProximityInfo::EQUIVALENT_CHAR
    308             : mProximityInfo->getMatchedProximityId(
    309                     mInputIndex, c, checkProximityChars, &proximityIndex);
    310 
    311     if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
    312         if (canTryCorrection && mOutputIndex > 0
    313                 && mCorrectionStates[mOutputIndex].mProximityMatching
    314                 && mCorrectionStates[mOutputIndex].mExceeding
    315                 && isEquivalentChar(mProximityInfo->getMatchedProximityId(
    316                         mInputIndex, mWord[mOutputIndex - 1], false))) {
    317             if (DEBUG_CORRECTION) {
    318                 LOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
    319             }
    320             // Conversion p->e
    321             // Example:
    322             // wearth ->    earth
    323             // px     -> (E)mmmmm
    324             ++mExcessiveCount;
    325             --mProximityCount;
    326             mExcessivePos = mOutputIndex - 1;
    327             ++mInputIndex;
    328             // Here, we are doing something equivalent to matchedProximityCharId,
    329             // but we already know that "excessive char correction" just happened
    330             // so that we just need to check "mProximityCount == 0".
    331             matchedProximityCharId = mProximityInfo->getMatchedProximityId(
    332                     mInputIndex, c, mProximityCount == 0, &proximityIndex);
    333         }
    334     }
    335 
    336     if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
    337         // TODO: Optimize
    338         // As the current char turned out to be an unrelated char,
    339         // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
    340         // here refers to the previous state.
    341         if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0
    342                 && !mCorrectionStates[mOutputIndex].mTransposing
    343                 && mCorrectionStates[mOutputIndex - 1].mTransposing
    344                 && isEquivalentChar(mProximityInfo->getMatchedProximityId(
    345                         mInputIndex, mWord[mOutputIndex - 1], false))
    346                 && isEquivalentChar(
    347                         mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
    348             // Conversion t->e
    349             // Example:
    350             // occaisional -> occa   sional
    351             // mmmmttx     -> mmmm(E)mmmmmm
    352             mTransposedCount -= 2;
    353             ++mExcessiveCount;
    354             ++mInputIndex;
    355         } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0
    356                 && !mCorrectionStates[mOutputIndex].mTransposing
    357                 && mCorrectionStates[mOutputIndex - 1].mTransposing
    358                 && isEquivalentChar(
    359                         mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
    360             // Conversion t->s
    361             // Example:
    362             // chcolate -> chocolate
    363             // mmttx    -> mmsmmmmmm
    364             mTransposedCount -= 2;
    365             ++mSkippedCount;
    366             --mInputIndex;
    367         } else if (canTryCorrection && mInputIndex > 0
    368                 && mCorrectionStates[mOutputIndex].mProximityMatching
    369                 && mCorrectionStates[mOutputIndex].mSkipping
    370                 && isEquivalentChar(
    371                         mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
    372             // Conversion p->s
    373             // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
    374             // proximity chars of "s", but it should rather be handled as a skipped char.
    375             ++mSkippedCount;
    376             --mProximityCount;
    377             return processSkipChar(c, isTerminal, false);
    378         } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
    379                 && isEquivalentChar(
    380                         mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
    381             // 1.2. Excessive or transpose correction
    382             if (mTransposing) {
    383                 ++mTransposedCount;
    384             } else {
    385                 ++mExcessiveCount;
    386                 incrementInputIndex();
    387             }
    388         } else if (mSkipping) {
    389             // 3. Skip correction
    390             ++mSkippedCount;
    391             return processSkipChar(c, isTerminal, false);
    392         } else {
    393             if (DEBUG_CORRECTION) {
    394                 DUMP_WORD(mWord, mOutputIndex);
    395                 LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    396                         mTransposedCount, mExcessiveCount, c);
    397             }
    398             return UNRELATED;
    399         }
    400     } else if (secondTransposing) {
    401         // If inputIndex is greater than mInputLength, that means there is no
    402         // proximity chars. So, we don't need to check proximity.
    403         mMatching = true;
    404     } else if (isEquivalentChar(matchedProximityCharId)) {
    405         mMatching = true;
    406         ++mEquivalentCharCount;
    407         mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
    408     } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
    409         mProximityMatching = true;
    410         ++mProximityCount;
    411         mDistances[mOutputIndex] =
    412                 mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
    413     }
    414 
    415     mWord[mOutputIndex] = c;
    416 
    417     // 4. Last char excessive correction
    418     mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0
    419             && mProximityCount == 0 && (mInputIndex == mInputLength - 2);
    420     const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
    421     if (mLastCharExceeded) {
    422         ++mExcessiveCount;
    423     }
    424 
    425     // Start traversing all nodes after the index exceeds the user typed length
    426     if (isSameAsUserTypedLength) {
    427         startToTraverseAllNodes();
    428     }
    429 
    430     const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
    431             mExceeding && mInputIndex == mInputLength - 2;
    432 
    433     // Finally, we are ready to go to the next character, the next "virtual node".
    434     // We should advance the input index.
    435     // We do this in this branch of the 'if traverseAllNodes' because we are still matching
    436     // characters to input; the other branch is not matching them but searching for
    437     // completions, this is why it does not have to do it.
    438     incrementInputIndex();
    439     // Also, the next char is one "virtual node" depth more than this char.
    440     incrementOutputIndex();
    441 
    442     if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar
    443             || isSameAsUserTypedLength) && isTerminal) {
    444         mTerminalInputIndex = mInputIndex - 1;
    445         mTerminalOutputIndex = mOutputIndex - 1;
    446         if (DEBUG_CORRECTION) {
    447             DUMP_WORD(mWord, mOutputIndex);
    448             LOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
    449                     mTransposedCount, mExcessiveCount, c);
    450         }
    451         return ON_TERMINAL;
    452     } else {
    453         return NOT_ON_TERMINAL;
    454     }
    455 }
    456 
    457 Correction::~Correction() {
    458 }
    459 
    460 /////////////////////////
    461 // static inline utils //
    462 /////////////////////////
    463 
    464 static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
    465 static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
    466     return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
    467 }
    468 
    469 static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
    470 inline static void multiplyIntCapped(const int multiplier, int *base) {
    471     const int temp = *base;
    472     if (temp != S_INT_MAX) {
    473         // Branch if multiplier == 2 for the optimization
    474         if (multiplier == 2) {
    475             *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
    476         } else {
    477             // TODO: This overflow check gives a wrong answer when, for example,
    478             //       temp = 2^16 + 1 and multiplier = 2^17 + 1.
    479             //       Fix this behavior.
    480             const int tempRetval = temp * multiplier;
    481             *base = tempRetval >= temp ? tempRetval : S_INT_MAX;
    482         }
    483     }
    484 }
    485 
    486 inline static int powerIntCapped(const int base, const int n) {
    487     if (n <= 0) return 1;
    488     if (base == 2) {
    489         return n < 31 ? 1 << n : S_INT_MAX;
    490     } else {
    491         int ret = base;
    492         for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
    493         return ret;
    494     }
    495 }
    496 
    497 inline static void multiplyRate(const int rate, int *freq) {
    498     if (*freq != S_INT_MAX) {
    499         if (*freq > 1000000) {
    500             *freq /= 100;
    501             multiplyIntCapped(rate, freq);
    502         } else {
    503             multiplyIntCapped(rate, freq);
    504             *freq /= 100;
    505         }
    506     }
    507 }
    508 
    509 inline static int getQuoteCount(const unsigned short* word, const int length) {
    510     int quoteCount = 0;
    511     for (int i = 0; i < length; ++i) {
    512         if(word[i] == '\'') {
    513             ++quoteCount;
    514         }
    515     }
    516     return quoteCount;
    517 }
    518 
    519 inline static bool isUpperCase(unsigned short c) {
    520      if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
    521          c = BASE_CHARS[c];
    522      }
    523      if (isupper(c)) {
    524          return true;
    525      }
    526      return false;
    527 }
    528 
    529 /* static */
    530 inline static int editDistance(
    531         int* editDistanceTable, const unsigned short* input,
    532         const int inputLength, const unsigned short* output, const int outputLength) {
    533     // dp[li][lo] dp[a][b] = dp[ a * lo + b]
    534     int* dp = editDistanceTable;
    535     const int li = inputLength + 1;
    536     const int lo = outputLength + 1;
    537     for (int i = 0; i < li; ++i) {
    538         dp[lo * i] = i;
    539     }
    540     for (int i = 0; i < lo; ++i) {
    541         dp[i] = i;
    542     }
    543 
    544     for (int i = 0; i < li - 1; ++i) {
    545         for (int j = 0; j < lo - 1; ++j) {
    546             const uint32_t ci = Dictionary::toBaseLowerCase(input[i]);
    547             const uint32_t co = Dictionary::toBaseLowerCase(output[j]);
    548             const uint16_t cost = (ci == co) ? 0 : 1;
    549             dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
    550                     min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
    551             if (i > 0 && j > 0 && ci == Dictionary::toBaseLowerCase(output[j - 1])
    552                     && co == Dictionary::toBaseLowerCase(input[i - 1])) {
    553                 dp[(i + 1) * lo + (j + 1)] = min(
    554                         dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
    555             }
    556         }
    557     }
    558 
    559     if (DEBUG_EDIT_DISTANCE) {
    560         LOGI("IN = %d, OUT = %d", inputLength, outputLength);
    561         for (int i = 0; i < li; ++i) {
    562             for (int j = 0; j < lo; ++j) {
    563                 LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
    564             }
    565         }
    566     }
    567     return dp[li * lo - 1];
    568 }
    569 
    570 //////////////////////
    571 // RankingAlgorithm //
    572 //////////////////////
    573 
    574 /* static */
    575 int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
    576         const int freq, int* editDistanceTable, const Correction* correction) {
    577     const int excessivePos = correction->getExcessivePos();
    578     const int inputLength = correction->mInputLength;
    579     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
    580     const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
    581     const ProximityInfo *proximityInfo = correction->mProximityInfo;
    582     const int skippedCount = correction->mSkippedCount;
    583     const int transposedCount = correction->mTransposedCount / 2;
    584     const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
    585     const int proximityMatchedCount = correction->mProximityCount;
    586     const bool lastCharExceeded = correction->mLastCharExceeded;
    587     const bool useFullEditDistance = correction->mUseFullEditDistance;
    588     const int outputLength = outputIndex + 1;
    589     if (skippedCount >= inputLength || inputLength == 0) {
    590         return -1;
    591     }
    592 
    593     // TODO: find more robust way
    594     bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
    595             : (inputLength == inputIndex + 1);
    596 
    597     // TODO: use mExcessiveCount
    598     const int matchCount = inputLength - correction->mProximityCount - excessiveCount;
    599 
    600     const unsigned short* word = correction->mWord;
    601     const bool skipped = skippedCount > 0;
    602 
    603     const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
    604             - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
    605 
    606     // TODO: Calculate edit distance for transposed and excessive
    607     int ed = 0;
    608     int adjustedProximityMatchedCount = proximityMatchedCount;
    609 
    610     int finalFreq = freq;
    611 
    612     // TODO: Optimize this.
    613     // TODO: Ignoring edit distance for transposed char, for now
    614     if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
    615         const unsigned short* primaryInputWord = proximityInfo->getPrimaryInputWord();
    616         ed = editDistance(editDistanceTable, primaryInputWord,
    617                 inputLength, word, outputIndex + 1);
    618         const int matchWeight = powerIntCapped(typedLetterMultiplier,
    619                 max(inputLength, outputIndex + 1) - ed);
    620         multiplyIntCapped(matchWeight, &finalFreq);
    621 
    622         // TODO: Demote further if there are two or more excessive chars with longer user input?
    623         if (inputLength > outputIndex + 1) {
    624             multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
    625         }
    626 
    627         ed = max(0, ed - quoteDiffCount);
    628 
    629         if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
    630             // Promote a word with just one skipped or excessive char
    631             if (sameLength) {
    632                 multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
    633             } else {
    634                 multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    635             }
    636         } else if (ed == 0) {
    637             multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    638             sameLength = true;
    639         }
    640         adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
    641                 proximityMatchedCount);
    642     } else {
    643         // TODO: Calculate the edit distance for transposed char
    644         const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
    645         multiplyIntCapped(matchWeight, &finalFreq);
    646     }
    647 
    648     if (proximityInfo->getMatchedProximityId(0, word[0], true)
    649             == ProximityInfo::UNRELATED_CHAR) {
    650         multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
    651     }
    652 
    653     ///////////////////////////////////////////////
    654     // Promotion and Demotion for each correction
    655 
    656     // Demotion for a word with missing character
    657     if (skipped) {
    658         const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
    659                 * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
    660                 / (10 * inputLength
    661                         - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
    662         if (DEBUG_DICT_FULL) {
    663             LOGI("Demotion rate for missing character is %d.", demotionRate);
    664         }
    665         multiplyRate(demotionRate, &finalFreq);
    666     }
    667 
    668     // Demotion for a word with transposed character
    669     if (transposedCount > 0) multiplyRate(
    670             WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
    671 
    672     // Demotion for a word with excessive character
    673     if (excessiveCount > 0) {
    674         multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
    675         if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
    676             if (DEBUG_CORRECTION_FREQ) {
    677                 LOGI("Double excessive demotion");
    678             }
    679             // If an excessive character is not adjacent to the left char or the right char,
    680             // we will demote this word.
    681             multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
    682         }
    683     }
    684 
    685     // Score calibration by touch coordinates is being done only for pure-fat finger typing error
    686     // cases.
    687     // TODO: Remove this constraint.
    688     if (CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled()
    689             && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0) {
    690         for (int i = 0; i < outputLength; ++i) {
    691             const int squaredDistance = correction->mDistances[i];
    692             if (i < adjustedProximityMatchedCount) {
    693                 multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    694             }
    695             if (squaredDistance >= 0) {
    696                 // Promote or demote the score according to the distance from the sweet spot
    697                 static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
    698                 static const float B = 1.0f;
    699                 static const float C = 0.5f;
    700                 static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
    701                 static const float R2 = HALF_SCORE_SQUARED_RADIUS;
    702                 const float x = (float)squaredDistance
    703                         / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
    704                 const float factor = (x < R1)
    705                     ? (A * (R1 - x) + B * x) / R1
    706                     : (B * (R2 - x) + C * (x - R1)) / (R2 - R1);
    707                 // factor is piecewise linear function like:
    708                 // A -_                  .
    709                 //     ^-_               .
    710                 // B      \              .
    711                 //         \             .
    712                 // C        \            .
    713                 //   0   R1 R2
    714                 multiplyRate((int)(factor * 100), &finalFreq);
    715             } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
    716                 multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    717             }
    718         }
    719     } else {
    720         // Promotion for a word with proximity characters
    721         for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
    722             // A word with proximity corrections
    723             if (DEBUG_DICT_FULL) {
    724                 LOGI("Found a proximity correction.");
    725             }
    726             multiplyIntCapped(typedLetterMultiplier, &finalFreq);
    727             multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
    728         }
    729     }
    730 
    731     const int errorCount = adjustedProximityMatchedCount > 0
    732             ? adjustedProximityMatchedCount
    733             : (proximityMatchedCount + transposedCount);
    734     multiplyRate(
    735             100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
    736 
    737     // Promotion for an exactly matched word
    738     if (ed == 0) {
    739         // Full exact match
    740         if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
    741             finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
    742         }
    743     }
    744 
    745     // Promote a word with no correction
    746     if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0) {
    747         multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
    748     }
    749 
    750     // TODO: Check excessive count and transposed count
    751     // TODO: Remove this if possible
    752     /*
    753          If the last character of the user input word is the same as the next character
    754          of the output word, and also all of characters of the user input are matched
    755          to the output word, we'll promote that word a bit because
    756          that word can be considered the combination of skipped and matched characters.
    757          This means that the 'sm' pattern wins over the 'ma' pattern.
    758          e.g.)
    759          shel -> shell [mmmma] or [mmmsm]
    760          hel -> hello [mmmaa] or [mmsma]
    761          m ... matching
    762          s ... skipping
    763          a ... traversing all
    764          t ... transposing
    765          e ... exceeding
    766          p ... proximity matching
    767      */
    768     if (matchCount == inputLength && matchCount >= 2 && !skipped
    769             && word[matchCount] == word[matchCount - 1]) {
    770         multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
    771     }
    772 
    773     // TODO: Do not use sameLength?
    774     if (sameLength) {
    775         multiplyIntCapped(fullWordMultiplier, &finalFreq);
    776     }
    777 
    778     if (useFullEditDistance && outputLength > inputLength + 1) {
    779         const int diff = outputLength - inputLength - 1;
    780         const int divider = diff < 31 ? 1 << diff : S_INT_MAX;
    781         finalFreq = divider > finalFreq ? 1 : finalFreq / divider;
    782     }
    783 
    784     if (DEBUG_DICT_FULL) {
    785         LOGI("calc: %d, %d", outputIndex, sameLength);
    786     }
    787 
    788     if (DEBUG_CORRECTION_FREQ) {
    789         DUMP_WORD(correction->mWord, outputIndex + 1);
    790         LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
    791                 skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
    792                 quoteDiffCount, ed, finalFreq);
    793     }
    794 
    795     return finalFreq;
    796 }
    797 
    798 /* static */
    799 int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
    800         const int firstFreq, const int secondFreq, const Correction* correction,
    801         const unsigned short *word) {
    802     const int spaceProximityPos = correction->mSpaceProximityPos;
    803     const int missingSpacePos = correction->mMissingSpacePos;
    804     if (DEBUG_DICT) {
    805         int inputCount = 0;
    806         if (spaceProximityPos >= 0) ++inputCount;
    807         if (missingSpacePos >= 0) ++inputCount;
    808         assert(inputCount <= 1);
    809     }
    810     const bool isSpaceProximity = spaceProximityPos >= 0;
    811     const int inputLength = correction->mInputLength;
    812     const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
    813     const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
    814             : (inputLength - missingSpacePos);
    815     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
    816 
    817     bool firstCapitalizedWordDemotion = false;
    818     if (firstWordLength >= 2) {
    819         firstCapitalizedWordDemotion = isUpperCase(word[0]);
    820     }
    821 
    822     bool secondCapitalizedWordDemotion = false;
    823     if (secondWordLength >= 2) {
    824         secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
    825     }
    826 
    827     const bool capitalizedWordDemotion =
    828             firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
    829 
    830     if (DEBUG_DICT_FULL) {
    831         LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion);
    832     }
    833 
    834     if (firstWordLength == 0 || secondWordLength == 0) {
    835         return 0;
    836     }
    837     const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
    838     int tempFirstFreq = firstFreq;
    839     multiplyRate(firstDemotionRate, &tempFirstFreq);
    840 
    841     const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
    842     int tempSecondFreq = secondFreq;
    843     multiplyRate(secondDemotionRate, &tempSecondFreq);
    844 
    845     const int totalLength = firstWordLength + secondWordLength;
    846 
    847     // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
    848     // length.
    849     int totalFreq = tempFirstFreq + tempSecondFreq;
    850 
    851     // This is a workaround to try offsetting the not-enough-demotion which will be done in
    852     // calcNormalizedScore in Utils.java.
    853     // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
    854     // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
    855     // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
    856     const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
    857     multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
    858 
    859     // At this moment, totalFreq is calculated by the following formula:
    860     // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
    861     //        * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
    862 
    863     multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
    864 
    865     // This is another workaround to offset the demotion which will be done in
    866     // calcNormalizedScore in Utils.java.
    867     // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
    868     // the same amount because we already have adjusted the synthetic freq of this "missing or
    869     // mistyped space" suggestion candidate above in this method.
    870     const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
    871     multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
    872 
    873     if (isSpaceProximity) {
    874         // A word pair with one space proximity correction
    875         if (DEBUG_DICT) {
    876             LOGI("Found a word pair with space proximity correction.");
    877         }
    878         multiplyIntCapped(typedLetterMultiplier, &totalFreq);
    879         multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
    880     }
    881 
    882     multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
    883 
    884     if (capitalizedWordDemotion) {
    885         multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
    886     }
    887 
    888     return totalFreq;
    889 }
    890 
    891 } // namespace latinime
    892