1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "utils/autocorrection_threshold_utils.h" 18 19 #include <algorithm> 20 #include <cmath> 21 22 #include "defines.h" 23 #include "suggest/policyimpl/utils/edit_distance.h" 24 #include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h" 25 26 namespace latinime { 27 28 const int AutocorrectionThresholdUtils::MAX_INITIAL_SCORE = 255; 29 const int AutocorrectionThresholdUtils::TYPED_LETTER_MULTIPLIER = 2; 30 const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2; 31 32 /* static */ int AutocorrectionThresholdUtils::editDistance(const int *before, 33 const int beforeLength, const int *after, const int afterLength) { 34 const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( 35 before, beforeLength, after, afterLength); 36 return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein)); 37 } 38 39 // In dictionary.cpp, getSuggestion() method, 40 // When USE_SUGGEST_INTERFACE_FOR_TYPING is true: 41 // 42 // // TODO: Revise the following logic thoroughly by referring to the logic 43 // // marked as "Otherwise" below. 44 // SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert 45 // them to integers. 46 // score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE) 47 // Undo the scaling here to recover the original score. 48 // normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE 49 // 50 // Otherwise: suggestion scores are computed using the below formula. 51 // original score 52 // := powf(mTypedLetterMultiplier (this is defined 2), 53 // (the number of matched characters between typed word and suggested word)) 54 // * (individual word's score which defined in the unigram dictionary, 55 // and this score is defined in range [0, 255].) 56 // Then, the following processing is applied. 57 // - If the dictionary word is matched up to the point of the user entry 58 // (full match up to min(before.length(), after.length()) 59 // => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) 60 // - If the word is a true full match except for differences in accents or 61 // capitalization, then treat it as if the score was 255. 62 // - If before.length() == after.length() 63 // => multiply by mFullWordMultiplier (this is defined 2)) 64 // So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2 65 // For historical reasons we ignore the 1.2 modifier (because the measure for a good 66 // autocorrection threshold was done at a time when it didn't exist). This doesn't change 67 // the result. 68 // So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. 69 70 /* static */ float AutocorrectionThresholdUtils::calcNormalizedScore(const int *before, 71 const int beforeLength, const int *after, const int afterLength, const int score) { 72 if (0 == beforeLength || 0 == afterLength) { 73 return 0.0f; 74 } 75 const int distance = editDistance(before, beforeLength, after, afterLength); 76 int spaceCount = 0; 77 for (int i = 0; i < afterLength; ++i) { 78 if (after[i] == KEYCODE_SPACE) { 79 ++spaceCount; 80 } 81 } 82 83 if (spaceCount == afterLength) { 84 return 0.0f; 85 } 86 87 if (score <= 0 || distance >= afterLength) { 88 // normalizedScore must be 0.0f (the minimum value) if the score is less than or equal to 0, 89 // or if the edit distance is larger than or equal to afterLength. 90 return 0.0f; 91 } 92 // add a weight based on edit distance. 93 const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); 94 95 // TODO: Revise the following logic thoroughly by referring to... 96 if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) { 97 return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight; 98 } 99 // ...this logic. 100 const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX) 101 : static_cast<float>(MAX_INITIAL_SCORE) 102 * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), 103 static_cast<float>(std::min(beforeLength, afterLength - spaceCount))) 104 * static_cast<float>(FULL_WORD_MULTIPLIER); 105 106 return (static_cast<float>(score) / maxScore) * weight; 107 } 108 109 } // namespace latinime 110