1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_DICTIONARY_H 18 #define LATINIME_DICTIONARY_H 19 20 namespace latinime { 21 22 // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words 23 #define ADDRESS_MASK 0x3FFFFF 24 25 // The bit that decides if an address follows in the next 22 bits 26 #define FLAG_ADDRESS_MASK 0x40 27 // The bit that decides if this is a terminal node for a word. The node could still have children, 28 // if the word has other endings. 29 #define FLAG_TERMINAL_MASK 0x80 30 31 #define FLAG_BIGRAM_READ 0x80 32 #define FLAG_BIGRAM_CHILDEXIST 0x40 33 #define FLAG_BIGRAM_CONTINUED 0x80 34 #define FLAG_BIGRAM_FREQ 0x7F 35 36 class Dictionary { 37 public: 38 Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier); 39 int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies, 40 int maxWordLength, int maxWords, int maxAlternatives, int skipPos, 41 int *nextLetters, int nextLettersSize); 42 int getBigrams(unsigned short *word, int length, int *codes, int codesSize, 43 unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams, 44 int maxAlternatives); 45 bool isValidWord(unsigned short *word, int length); 46 void setAsset(void *asset) { mAsset = asset; } 47 void *getAsset() { return mAsset; } 48 ~Dictionary(); 49 50 private: 51 52 void getVersionNumber(); 53 bool checkIfDictVersionIsLatest(); 54 int getAddress(int *pos); 55 int getBigramAddress(int *pos, bool advance); 56 int getFreq(int *pos); 57 int getBigramFreq(int *pos); 58 void searchForTerminalNode(int address, int frequency); 59 60 bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; } 61 bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; } 62 bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; } 63 int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; } 64 unsigned short getChar(int *pos); 65 int wideStrLen(unsigned short *str); 66 67 bool sameAsTyped(unsigned short *word, int length); 68 bool checkFirstCharacter(unsigned short *word); 69 bool addWord(unsigned short *word, int length, int frequency); 70 bool addWordBigram(unsigned short *word, int length, int frequency); 71 unsigned short toLowerCase(unsigned short c); 72 void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency, 73 int inputIndex, int diffs); 74 int isValidWordRec(int pos, unsigned short *word, int offset, int length); 75 void registerNextLetter(unsigned short c); 76 77 unsigned char *mDict; 78 void *mAsset; 79 80 int *mFrequencies; 81 int *mBigramFreq; 82 int mMaxWords; 83 int mMaxBigrams; 84 int mMaxWordLength; 85 unsigned short *mOutputChars; 86 unsigned short *mBigramChars; 87 int *mInputCodes; 88 int mInputLength; 89 int mMaxAlternatives; 90 unsigned short mWord[128]; 91 int mSkipPos; 92 int mMaxEditDistance; 93 94 int mFullWordMultiplier; 95 int mTypedLetterMultiplier; 96 int *mNextLettersFrequencies; 97 int mNextLettersSize; 98 int mVersion; 99 int mBigram; 100 }; 101 102 // ---------------------------------------------------------------------------- 103 104 }; // namespace latinime 105 106 #endif // LATINIME_DICTIONARY_H 107