Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_MULTI_BIGRAM_MAP_H
     18 #define LATINIME_MULTI_BIGRAM_MAP_H
     19 
     20 #include <cstddef>
     21 
     22 #include "defines.h"
     23 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
     24 #include "suggest/core/dictionary/bloom_filter.h"
     25 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
     26 #include "utils/hash_map_compat.h"
     27 
     28 namespace latinime {
     29 
     30 // Class for caching bigram maps for multiple previous word contexts. This is useful since the
     31 // algorithm needs to look up the set of bigrams for every word pair that occurs in every
     32 // multi-word suggestion.
     33 class MultiBigramMap {
     34  public:
     35     MultiBigramMap() : mBigramMaps() {}
     36     ~MultiBigramMap() {}
     37 
     38     // Look up the bigram probability for the given word pair from the cached bigram maps.
     39     // Also caches the bigrams if there is space remaining and they have not been cached already.
     40     int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
     41             const int wordPosition, const int nextWordPosition, const int unigramProbability) {
     42         hash_map_compat<int, BigramMap>::const_iterator mapPosition =
     43                 mBigramMaps.find(wordPosition);
     44         if (mapPosition != mBigramMaps.end()) {
     45             return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
     46                     unigramProbability);
     47         }
     48         if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
     49             addBigramsForWordPosition(structurePolicy, wordPosition);
     50             return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
     51                     nextWordPosition, unigramProbability);
     52         }
     53         return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
     54                 nextWordPosition, unigramProbability);
     55     }
     56 
     57     void clear() {
     58         mBigramMaps.clear();
     59     }
     60 
     61  private:
     62     DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
     63 
     64     class BigramMap {
     65      public:
     66         BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
     67         ~BigramMap() {}
     68 
     69         void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
     70                 const int nodePos) {
     71             const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
     72             BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
     73                     bigramsListPos);
     74             while (bigramsIt.hasNext()) {
     75                 bigramsIt.next();
     76                 if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
     77                     continue;
     78                 }
     79                 mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
     80                 mBloomFilter.setInFilter(bigramsIt.getBigramPos());
     81             }
     82         }
     83 
     84         AK_FORCE_INLINE int getBigramProbability(
     85                 const DictionaryStructureWithBufferPolicy *const structurePolicy,
     86                 const int nextWordPosition, const int unigramProbability) const {
     87             int bigramProbability = NOT_A_PROBABILITY;
     88             if (mBloomFilter.isInFilter(nextWordPosition)) {
     89                 const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
     90                         mBigramMap.find(nextWordPosition);
     91                 if (bigramProbabilityIt != mBigramMap.end()) {
     92                     bigramProbability = bigramProbabilityIt->second;
     93                 }
     94             }
     95             return structurePolicy->getProbability(unigramProbability, bigramProbability);
     96         }
     97 
     98      private:
     99         // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
    100         // copy constructor is needed for use in hash_map.
    101         static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
    102         hash_map_compat<int, int> mBigramMap;
    103         BloomFilter mBloomFilter;
    104     };
    105 
    106     AK_FORCE_INLINE void addBigramsForWordPosition(
    107             const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
    108         mBigramMaps[position].init(structurePolicy, position);
    109     }
    110 
    111     AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
    112             const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
    113             const int nextWordPosition, const int unigramProbability) {
    114         int bigramProbability = NOT_A_PROBABILITY;
    115         const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
    116         BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
    117                 bigramsListPos);
    118         while (bigramsIt.hasNext()) {
    119             bigramsIt.next();
    120             if (bigramsIt.getBigramPos() == nextWordPosition) {
    121                 bigramProbability = bigramsIt.getProbability();
    122                 break;
    123             }
    124         }
    125         return structurePolicy->getProbability(unigramProbability, bigramProbability);
    126     }
    127 
    128     static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
    129     hash_map_compat<int, BigramMap> mBigramMaps;
    130 };
    131 } // namespace latinime
    132 #endif // LATINIME_MULTI_BIGRAM_MAP_H
    133