1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_BIGRAM_DICTIONARY_H 18 #define LATINIME_BIGRAM_DICTIONARY_H 19 20 #include <map> 21 #include <stdint.h> 22 23 #include "defines.h" 24 25 namespace latinime { 26 27 class Dictionary; 28 class BigramDictionary { 29 public: 30 BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary); 31 int getBigrams(const int32_t *word, int length, int *codes, int codesSize, 32 unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams); 33 int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength); 34 void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, 35 std::map<int, int> *map, uint8_t *filter); 36 bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2); 37 ~BigramDictionary(); 38 private: 39 bool addWordBigram(unsigned short *word, int length, int frequency); 40 int getBigramAddress(int *pos, bool advance); 41 int getBigramFreq(int *pos); 42 void searchForTerminalNode(int addressLookingFor, int frequency); 43 bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } 44 bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } 45 bool checkFirstCharacter(unsigned short *word); 46 47 const unsigned char *DICT; 48 const int MAX_WORD_LENGTH; 49 // TODO: Re-implement proximity correction for bigram correction 50 static const int MAX_ALTERNATIVES = 1; 51 52 Dictionary *mParentDictionary; 53 int *mBigramFreq; 54 int mMaxBigrams; 55 unsigned short *mBigramChars; 56 int *mInputCodes; 57 int mInputLength; 58 }; 59 60 } // namespace latinime 61 62 #endif // LATINIME_BIGRAM_DICTIONARY_H 63