1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_MULTI_BIGRAM_MAP_H 18 #define LATINIME_MULTI_BIGRAM_MAP_H 19 20 #include <cstddef> 21 22 #include "defines.h" 23 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" 24 #include "suggest/core/dictionary/bloom_filter.h" 25 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 26 #include "utils/hash_map_compat.h" 27 28 namespace latinime { 29 30 // Class for caching bigram maps for multiple previous word contexts. This is useful since the 31 // algorithm needs to look up the set of bigrams for every word pair that occurs in every 32 // multi-word suggestion. 33 class MultiBigramMap { 34 public: 35 MultiBigramMap() : mBigramMaps() {} 36 ~MultiBigramMap() {} 37 38 // Look up the bigram probability for the given word pair from the cached bigram maps. 39 // Also caches the bigrams if there is space remaining and they have not been cached already. 40 int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, 41 const int wordPosition, const int nextWordPosition, const int unigramProbability) { 42 hash_map_compat<int, BigramMap>::const_iterator mapPosition = 43 mBigramMaps.find(wordPosition); 44 if (mapPosition != mBigramMaps.end()) { 45 return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, 46 unigramProbability); 47 } 48 if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { 49 addBigramsForWordPosition(structurePolicy, wordPosition); 50 return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, 51 nextWordPosition, unigramProbability); 52 } 53 return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, 54 nextWordPosition, unigramProbability); 55 } 56 57 void clear() { 58 mBigramMaps.clear(); 59 } 60 61 private: 62 DISALLOW_COPY_AND_ASSIGN(MultiBigramMap); 63 64 class BigramMap { 65 public: 66 BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} 67 ~BigramMap() {} 68 69 void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, 70 const int nodePos) { 71 const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); 72 BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), 73 bigramsListPos); 74 while (bigramsIt.hasNext()) { 75 bigramsIt.next(); 76 if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { 77 continue; 78 } 79 mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); 80 mBloomFilter.setInFilter(bigramsIt.getBigramPos()); 81 } 82 } 83 84 AK_FORCE_INLINE int getBigramProbability( 85 const DictionaryStructureWithBufferPolicy *const structurePolicy, 86 const int nextWordPosition, const int unigramProbability) const { 87 int bigramProbability = NOT_A_PROBABILITY; 88 if (mBloomFilter.isInFilter(nextWordPosition)) { 89 const hash_map_compat<int, int>::const_iterator bigramProbabilityIt = 90 mBigramMap.find(nextWordPosition); 91 if (bigramProbabilityIt != mBigramMap.end()) { 92 bigramProbability = bigramProbabilityIt->second; 93 } 94 } 95 return structurePolicy->getProbability(unigramProbability, bigramProbability); 96 } 97 98 private: 99 // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default 100 // copy constructor is needed for use in hash_map. 101 static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP; 102 hash_map_compat<int, int> mBigramMap; 103 BloomFilter mBloomFilter; 104 }; 105 106 AK_FORCE_INLINE void addBigramsForWordPosition( 107 const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { 108 mBigramMaps[position].init(structurePolicy, position); 109 } 110 111 AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( 112 const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, 113 const int nextWordPosition, const int unigramProbability) { 114 int bigramProbability = NOT_A_PROBABILITY; 115 const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); 116 BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), 117 bigramsListPos); 118 while (bigramsIt.hasNext()) { 119 bigramsIt.next(); 120 if (bigramsIt.getBigramPos() == nextWordPosition) { 121 bigramProbability = bigramsIt.getProbability(); 122 break; 123 } 124 } 125 return structurePolicy->getProbability(unigramProbability, bigramProbability); 126 } 127 128 static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; 129 hash_map_compat<int, BigramMap> mBigramMaps; 130 }; 131 } // namespace latinime 132 #endif // LATINIME_MULTI_BIGRAM_MAP_H 133