Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_CORRECTION_H
     18 #define LATINIME_CORRECTION_H
     19 
     20 #include <stdint.h>
     21 #include "correction_state.h"
     22 
     23 #include "defines.h"
     24 
     25 namespace latinime {
     26 
     27 class ProximityInfo;
     28 
     29 class Correction {
     30 
     31 public:
     32     typedef enum {
     33         TRAVERSE_ALL_ON_TERMINAL,
     34         TRAVERSE_ALL_NOT_ON_TERMINAL,
     35         UNRELATED,
     36         ON_TERMINAL,
     37         NOT_ON_TERMINAL
     38     } CorrectionType;
     39 
     40     Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
     41     void initCorrection(
     42             const ProximityInfo *pi, const int inputLength, const int maxWordLength);
     43     void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
     44 
     45     // TODO: remove
     46     void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
     47             const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance);
     48     void checkState();
     49     bool initProcessState(const int index);
     50 
     51     int getOutputIndex();
     52     int getInputIndex();
     53 
     54     virtual ~Correction();
     55     int getSpaceProximityPos() const {
     56         return mSpaceProximityPos;
     57     }
     58     int getMissingSpacePos() const {
     59         return mMissingSpacePos;
     60     }
     61 
     62     int getSkipPos() const {
     63         return mSkipPos;
     64     }
     65 
     66     int getExcessivePos() const {
     67         return mExcessivePos;
     68     }
     69 
     70     int getTransposedPos() const {
     71         return mTransposedPos;
     72     }
     73 
     74     bool needsToPrune() const;
     75 
     76     int getFreqForSplitTwoWords(
     77             const int firstFreq, const int secondFreq, const unsigned short *word);
     78     int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
     79 
     80     CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
     81 
     82     /////////////////////////
     83     // Tree helper methods
     84     int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
     85 
     86     inline int getTreeSiblingPos(const int index) const {
     87         return mCorrectionStates[index].mSiblingPos;
     88     }
     89 
     90     inline void setTreeSiblingPos(const int index, const int pos) {
     91         mCorrectionStates[index].mSiblingPos = pos;
     92     }
     93 
     94     inline int getTreeParentIndex(const int index) const {
     95         return mCorrectionStates[index].mParentIndex;
     96     }
     97 private:
     98     inline void incrementInputIndex();
     99     inline void incrementOutputIndex();
    100     inline bool needsToTraverseAllNodes();
    101     inline void startToTraverseAllNodes();
    102     inline bool isQuote(const unsigned short c);
    103     inline CorrectionType processSkipChar(
    104             const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
    105 
    106     const int TYPED_LETTER_MULTIPLIER;
    107     const int FULL_WORD_MULTIPLIER;
    108     const ProximityInfo *mProximityInfo;
    109 
    110     bool mUseFullEditDistance;
    111     int mMaxEditDistance;
    112     int mMaxDepth;
    113     int mInputLength;
    114     int mSpaceProximityPos;
    115     int mMissingSpacePos;
    116     int mTerminalInputIndex;
    117     int mTerminalOutputIndex;
    118 
    119     // The following arrays are state buffer.
    120     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
    121     int mDistances[MAX_WORD_LENGTH_INTERNAL];
    122 
    123     // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
    124     // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
    125     int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)];
    126 
    127     CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL];
    128 
    129     // The following member variables are being used as cache values of the correction state.
    130     bool mNeedsToTraverseAllNodes;
    131     int mOutputIndex;
    132     int mInputIndex;
    133 
    134     int mEquivalentCharCount;
    135     int mProximityCount;
    136     int mExcessiveCount;
    137     int mTransposedCount;
    138     int mSkippedCount;
    139 
    140     int mTransposedPos;
    141     int mExcessivePos;
    142     int mSkipPos;
    143 
    144     bool mLastCharExceeded;
    145 
    146     bool mMatching;
    147     bool mProximityMatching;
    148     bool mExceeding;
    149     bool mTransposing;
    150     bool mSkipping;
    151 
    152     class RankingAlgorithm {
    153     public:
    154         static int calculateFinalFreq(const int inputIndex, const int depth,
    155                 const int freq, int *editDistanceTable, const Correction* correction);
    156         static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
    157                 const Correction* correction, const unsigned short *word);
    158     };
    159 };
    160 } // namespace latinime
    161 #endif // LATINIME_CORRECTION_H
    162