Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_MULTI_BIGRAM_MAP_H
     18 #define LATINIME_MULTI_BIGRAM_MAP_H
     19 
     20 #include <cstddef>
     21 #include <unordered_map>
     22 
     23 #include "defines.h"
     24 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
     25 #include "suggest/core/dictionary/bloom_filter.h"
     26 #include "suggest/core/dictionary/ngram_listener.h"
     27 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
     28 
     29 namespace latinime {
     30 
     31 // Class for caching bigram maps for multiple previous word contexts. This is useful since the
     32 // algorithm needs to look up the set of bigrams for every word pair that occurs in every
     33 // multi-word suggestion.
     34 class MultiBigramMap {
     35  public:
     36     MultiBigramMap() : mBigramMaps() {}
     37     ~MultiBigramMap() {}
     38 
     39     // Look up the bigram probability for the given word pair from the cached bigram maps.
     40     // Also caches the bigrams if there is space remaining and they have not been cached already.
     41     int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
     42             const int *const prevWordsPtNodePos, const int nextWordPosition,
     43             const int unigramProbability);
     44 
     45     void clear() {
     46         mBigramMaps.clear();
     47     }
     48 
     49  private:
     50     DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
     51 
     52     class BigramMap : public NgramListener {
     53      public:
     54         BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
     55         // Copy constructor needed for std::unordered_map.
     56         BigramMap(const BigramMap &bigramMap)
     57                 : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
     58         virtual ~BigramMap() {}
     59 
     60         void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
     61                 const int *const prevWordsPtNodePos);
     62         int getBigramProbability(
     63                 const DictionaryStructureWithBufferPolicy *const structurePolicy,
     64                 const int nextWordPosition, const int unigramProbability) const;
     65         virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
     66 
     67      private:
     68         static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
     69         std::unordered_map<int, int> mBigramMap;
     70         BloomFilter mBloomFilter;
     71     };
     72 
     73     void addBigramsForWordPosition(
     74             const DictionaryStructureWithBufferPolicy *const structurePolicy,
     75             const int *const prevWordsPtNodePos);
     76 
     77     int readBigramProbabilityFromBinaryDictionary(
     78             const DictionaryStructureWithBufferPolicy *const structurePolicy,
     79             const int *const prevWordsPtNodePos, const int nextWordPosition,
     80             const int unigramProbability);
     81 
     82     static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
     83     std::unordered_map<int, BigramMap> mBigramMaps;
     84 };
     85 } // namespace latinime
     86 #endif // LATINIME_MULTI_BIGRAM_MAP_H
     87