1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_CORRECTION_H 18 #define LATINIME_CORRECTION_H 19 20 #include <stdint.h> 21 #include "correction_state.h" 22 23 #include "defines.h" 24 25 namespace latinime { 26 27 class ProximityInfo; 28 29 class Correction { 30 31 public: 32 typedef enum { 33 TRAVERSE_ALL_ON_TERMINAL, 34 TRAVERSE_ALL_NOT_ON_TERMINAL, 35 UNRELATED, 36 ON_TERMINAL, 37 NOT_ON_TERMINAL 38 } CorrectionType; 39 40 Correction(const int typedLetterMultiplier, const int fullWordMultiplier); 41 void initCorrection( 42 const ProximityInfo *pi, const int inputLength, const int maxWordLength); 43 void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); 44 45 // TODO: remove 46 void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, 47 const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance); 48 void checkState(); 49 bool initProcessState(const int index); 50 51 int getOutputIndex(); 52 int getInputIndex(); 53 54 virtual ~Correction(); 55 int getSpaceProximityPos() const { 56 return mSpaceProximityPos; 57 } 58 int getMissingSpacePos() const { 59 return mMissingSpacePos; 60 } 61 62 int getSkipPos() const { 63 return mSkipPos; 64 } 65 66 int getExcessivePos() const { 67 return mExcessivePos; 68 } 69 70 int getTransposedPos() const { 71 return mTransposedPos; 72 } 73 74 bool needsToPrune() const; 75 76 int getFreqForSplitTwoWords( 77 const int firstFreq, const int secondFreq, const unsigned short *word); 78 int getFinalFreq(const int freq, unsigned short **word, int* wordLength); 79 80 CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); 81 82 ///////////////////////// 83 // Tree helper methods 84 int goDownTree(const int parentIndex, const int childCount, const int firstChildPos); 85 86 inline int getTreeSiblingPos(const int index) const { 87 return mCorrectionStates[index].mSiblingPos; 88 } 89 90 inline void setTreeSiblingPos(const int index, const int pos) { 91 mCorrectionStates[index].mSiblingPos = pos; 92 } 93 94 inline int getTreeParentIndex(const int index) const { 95 return mCorrectionStates[index].mParentIndex; 96 } 97 private: 98 inline void incrementInputIndex(); 99 inline void incrementOutputIndex(); 100 inline bool needsToTraverseAllNodes(); 101 inline void startToTraverseAllNodes(); 102 inline bool isQuote(const unsigned short c); 103 inline CorrectionType processSkipChar( 104 const int32_t c, const bool isTerminal, const bool inputIndexIncremented); 105 106 const int TYPED_LETTER_MULTIPLIER; 107 const int FULL_WORD_MULTIPLIER; 108 const ProximityInfo *mProximityInfo; 109 110 bool mUseFullEditDistance; 111 int mMaxEditDistance; 112 int mMaxDepth; 113 int mInputLength; 114 int mSpaceProximityPos; 115 int mMissingSpacePos; 116 int mTerminalInputIndex; 117 int mTerminalOutputIndex; 118 119 // The following arrays are state buffer. 120 unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; 121 int mDistances[MAX_WORD_LENGTH_INTERNAL]; 122 123 // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N. 124 // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot. 125 int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)]; 126 127 CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL]; 128 129 // The following member variables are being used as cache values of the correction state. 130 bool mNeedsToTraverseAllNodes; 131 int mOutputIndex; 132 int mInputIndex; 133 134 int mEquivalentCharCount; 135 int mProximityCount; 136 int mExcessiveCount; 137 int mTransposedCount; 138 int mSkippedCount; 139 140 int mTransposedPos; 141 int mExcessivePos; 142 int mSkipPos; 143 144 bool mLastCharExceeded; 145 146 bool mMatching; 147 bool mProximityMatching; 148 bool mExceeding; 149 bool mTransposing; 150 bool mSkipping; 151 152 class RankingAlgorithm { 153 public: 154 static int calculateFinalFreq(const int inputIndex, const int depth, 155 const int freq, int *editDistanceTable, const Correction* correction); 156 static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, 157 const Correction* correction, const unsigned short *word); 158 }; 159 }; 160 } // namespace latinime 161 #endif // LATINIME_CORRECTION_H 162