1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_MULTI_BIGRAM_MAP_H 18 #define LATINIME_MULTI_BIGRAM_MAP_H 19 20 #include <cstddef> 21 #include <unordered_map> 22 23 #include "defines.h" 24 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" 25 #include "suggest/core/dictionary/bloom_filter.h" 26 #include "suggest/core/dictionary/ngram_listener.h" 27 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 28 29 namespace latinime { 30 31 // Class for caching bigram maps for multiple previous word contexts. This is useful since the 32 // algorithm needs to look up the set of bigrams for every word pair that occurs in every 33 // multi-word suggestion. 34 class MultiBigramMap { 35 public: 36 MultiBigramMap() : mBigramMaps() {} 37 ~MultiBigramMap() {} 38 39 // Look up the bigram probability for the given word pair from the cached bigram maps. 40 // Also caches the bigrams if there is space remaining and they have not been cached already. 41 int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, 42 const int *const prevWordsPtNodePos, const int nextWordPosition, 43 const int unigramProbability); 44 45 void clear() { 46 mBigramMaps.clear(); 47 } 48 49 private: 50 DISALLOW_COPY_AND_ASSIGN(MultiBigramMap); 51 52 class BigramMap : public NgramListener { 53 public: 54 BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} 55 // Copy constructor needed for std::unordered_map. 56 BigramMap(const BigramMap &bigramMap) 57 : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {} 58 virtual ~BigramMap() {} 59 60 void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, 61 const int *const prevWordsPtNodePos); 62 int getBigramProbability( 63 const DictionaryStructureWithBufferPolicy *const structurePolicy, 64 const int nextWordPosition, const int unigramProbability) const; 65 virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos); 66 67 private: 68 static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP; 69 std::unordered_map<int, int> mBigramMap; 70 BloomFilter mBloomFilter; 71 }; 72 73 void addBigramsForWordPosition( 74 const DictionaryStructureWithBufferPolicy *const structurePolicy, 75 const int *const prevWordsPtNodePos); 76 77 int readBigramProbabilityFromBinaryDictionary( 78 const DictionaryStructureWithBufferPolicy *const structurePolicy, 79 const int *const prevWordsPtNodePos, const int nextWordPosition, 80 const int unigramProbability); 81 82 static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; 83 std::unordered_map<int, BigramMap> mBigramMaps; 84 }; 85 } // namespace latinime 86 #endif // LATINIME_MULTI_BIGRAM_MAP_H 87