Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_UNIGRAM_DICTIONARY_H
     18 #define LATINIME_UNIGRAM_DICTIONARY_H
     19 
     20 #include <map>
     21 #include <stdint.h>
     22 #include "defines.h"
     23 #include "digraph_utils.h"
     24 
     25 namespace latinime {
     26 
     27 class Correction;
     28 class ProximityInfo;
     29 class TerminalAttributes;
     30 class WordsPriorityQueuePool;
     31 
     32 class UnigramDictionary {
     33  public:
     34     // Error tolerances
     35     static const int DEFAULT_MAX_ERRORS = 2;
     36     static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
     37 
     38     static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
     39     static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
     40     static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
     41     UnigramDictionary(const uint8_t *const streamStart, const unsigned int dictFlags);
     42     int getProbability(const int *const inWord, const int length) const;
     43     int getBigramPosition(int pos, int *word, int offset, int length) const;
     44     int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
     45             const int *ycoordinates, const int *inputCodePoints, const int inputSize,
     46             const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
     47             const bool useFullEditDistance, int *outWords, int *frequencies,
     48             int *outputTypes) const;
     49     int getDictFlags() const { return DICT_FLAGS; }
     50     virtual ~UnigramDictionary();
     51 
     52  private:
     53     DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary);
     54     void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
     55             const int *ycoordinates, const int *inputCodePoints, const int inputSize,
     56             const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
     57             const bool useFullEditDistance, Correction *correction,
     58             WordsPriorityQueuePool *queuePool) const;
     59     int getDigraphReplacement(const int *codes, const int i, const int inputSize,
     60             const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const;
     61     void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates,
     62             const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer,
     63             int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap,
     64             const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc,
     65             const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
     66             WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs,
     67             const unsigned int digraphsSize) const;
     68     void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
     69             const int *ycoordinates, const int *codes, const int inputSize,
     70             Correction *correction) const;
     71     void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
     72             const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
     73             const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputSize,
     74             Correction *correction, WordsPriorityQueuePool *queuePool) const;
     75     void getSuggestionCandidates(
     76             const bool useFullEditDistance, const int inputSize,
     77             const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
     78             Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion,
     79             const int maxErrors, const int currentWordIndex) const;
     80     void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
     81             const int *ycoordinates, const int *codes, const bool useFullEditDistance,
     82             const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
     83             const bool hasAutoCorrectionCandidate) const;
     84     void onTerminal(const int freq, const TerminalAttributes &terminalAttributes,
     85             Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
     86             const int currentWordIndex) const;
     87     // Process a node by considering proximity, missing and excessive character
     88     bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
     89             const uint8_t *bigramFilter, Correction *correction, int *newCount,
     90             int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
     91             const int currentWordIndex) const;
     92     int getMostProbableWordLike(const int startInputIndex, const int inputSize,
     93             Correction *correction, int *word) const;
     94     int getMostProbableWordLikeInner(const int *const inWord, const int inputSize,
     95             int *outWord) const;
     96     int getSubStringSuggestion(ProximityInfo *proximityInfo, const int *xcoordinates,
     97             const int *ycoordinates, const int *codes, const bool useFullEditDistance,
     98             Correction *correction, WordsPriorityQueuePool *queuePool, const int inputSize,
     99             const bool hasAutoCorrectionCandidate, const int currentWordIndex,
    100             const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos,
    101             const bool isSpaceProximity, int *freqArray, int *wordLengthArray, int *outputWord,
    102             int *outputWordLength) const;
    103     void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
    104             const int *ycoordinates, const int *codes, const bool useFullEditDistance,
    105             const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
    106             const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
    107             const int outputWordLength, int *freqArray, int *wordLengthArray,
    108             int *outputWord) const;
    109 
    110     const uint8_t *const DICT_ROOT;
    111     const int ROOT_POS;
    112     const int MAX_DIGRAPH_SEARCH_DEPTH;
    113     const int DICT_FLAGS;
    114 };
    115 } // namespace latinime
    116 #endif // LATINIME_UNIGRAM_DICTIONARY_H
    117