Home | History | Annotate | Download | only in fst
      1 // bi-table.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Classes for representing a bijective mapping between an arbitrary entry
     20 // of type T and a signed integral ID.
     21 
     22 #ifndef FST_LIB_BI_TABLE_H__
     23 #define FST_LIB_BI_TABLE_H__
     24 
     25 #include <deque>
     26 #include <vector>
     27 using std::vector;
     28 
     29 namespace fst {
     30 
     31 // BI TABLES - these determine a bijective mapping between an
     32 // arbitrary entry of type T and an signed integral ID of type I. The IDs are
     33 // allocated starting from 0 in order.
     34 //
     35 // template <class I, class T>
     36 // class BiTable {
     37 //  public:
     38 //
     39 //   // Required constructors.
     40 //   BiTable();
     41 //
     42 //   // Lookup integer ID from entry. If it doesn't exist, then add it.
     43 //   I FindId(const T &entry);
     44 //   // Lookup entry from integer ID.
     45 //   const T &FindEntry(I) const;
     46 //   // # of stored entries.
     47 //   I Size() const;
     48 // };
     49 
     50 // An implementation using a hash map for the entry to ID mapping.
     51 // The  entry T must have == defined and the default constructor
     52 // must produce an entry that will never be seen. H is the hash function.
     53 template <class I, class T, class H>
     54 class HashBiTable {
     55  public:
     56 
     57   HashBiTable() {
     58     T empty_entry;
     59   }
     60 
     61   I FindId(const T &entry) {
     62     I &id_ref = entry2id_[entry];
     63     if (id_ref == 0) {  // T not found; store and assign it a new ID.
     64       id2entry_.push_back(entry);
     65       id_ref = id2entry_.size();
     66     }
     67     return id_ref - 1;  // NB: id_ref = ID + 1
     68   }
     69 
     70   const T &FindEntry(I s) const {
     71     return id2entry_[s];
     72   }
     73 
     74   I Size() const { return id2entry_.size(); }
     75 
     76  private:
     77   unordered_map<T, I, H> entry2id_;
     78   vector<T> id2entry_;
     79 
     80   DISALLOW_COPY_AND_ASSIGN(HashBiTable);
     81 };
     82 
     83 
     84 // An implementation using a hash set for the entry to ID
     85 // mapping.  The hash set holds 'keys' which are either the ID
     86 // or kCurrentKey.  These keys can be mapped to entrys either by
     87 // looking up in the entry vector or, if kCurrentKey, in current_entry_
     88 // member. The hash and key equality functions map to entries first.
     89 // The  entry T must have == defined and the default constructor
     90 // must produce a entry that will never be seen. H is the hash
     91 // function.
     92 template <class I, class T, class H>
     93 class CompactHashBiTable {
     94  public:
     95   friend class HashFunc;
     96   friend class HashEqual;
     97 
     98   CompactHashBiTable()
     99       : hash_func_(*this),
    100         hash_equal_(*this),
    101         keys_(0, hash_func_, hash_equal_) {
    102   }
    103 
    104   // Reserves space for table_size elements.
    105   explicit CompactHashBiTable(size_t table_size)
    106       : hash_func_(*this),
    107         hash_equal_(*this),
    108         keys_(table_size, hash_func_, hash_equal_) {
    109     id2entry_.reserve(table_size);
    110   }
    111 
    112   I FindId(const T &entry) {
    113     current_entry_ = &entry;
    114     typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
    115     if (it == keys_.end()) {
    116       I key = id2entry_.size();
    117       id2entry_.push_back(entry);
    118       keys_.insert(key);
    119       return key;
    120     } else {
    121       return *it;
    122     }
    123   }
    124 
    125   const T &FindEntry(I s) const { return id2entry_[s]; }
    126   I Size() const { return id2entry_.size(); }
    127 
    128  private:
    129   static const I kEmptyKey;    // -1
    130   static const I kCurrentKey;  // -2
    131 
    132   class HashFunc {
    133    public:
    134     HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
    135 
    136     size_t operator()(I k) const { return hf(ht_->Key2T(k)); }
    137    private:
    138     const CompactHashBiTable *ht_;
    139     H hf;
    140   };
    141 
    142   class HashEqual {
    143    public:
    144     HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
    145 
    146     bool operator()(I k1, I k2) const {
    147       return ht_->Key2T(k1) == ht_->Key2T(k2);
    148     }
    149    private:
    150     const CompactHashBiTable *ht_;
    151   };
    152 
    153   typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
    154 
    155   const T &Key2T(I k) const {
    156     if (k == kEmptyKey)
    157       return empty_entry_;
    158     else if (k == kCurrentKey)
    159       return *current_entry_;
    160     else
    161       return id2entry_[k];
    162   }
    163 
    164   HashFunc hash_func_;
    165   HashEqual hash_equal_;
    166   KeyHashSet keys_;
    167   vector<T> id2entry_;
    168   const T empty_entry_;
    169   const T *current_entry_;
    170 
    171   DISALLOW_COPY_AND_ASSIGN(CompactHashBiTable);
    172 };
    173 
    174 template <class I, class T, class H>
    175 const I CompactHashBiTable<I, T, H>::kEmptyKey = -1;
    176 
    177 template <class I, class T, class H>
    178 const I CompactHashBiTable<I, T, H>::kCurrentKey = -2;
    179 
    180 
    181 // An implementation using a vector for the entry to ID mapping.
    182 // It is passed a function object FP that should fingerprint entries
    183 // uniquely to an integer that can used as a vector index. Normally,
    184 // VectorBiTable constructs the FP object.  The user can instead
    185 // pass in this object; in that case, VectorBiTable takes its
    186 // ownership.
    187 template <class I, class T, class FP>
    188 class VectorBiTable {
    189  public:
    190   explicit VectorBiTable(FP *fp = 0) : fp_(fp ? fp : new FP()) {}
    191 
    192   ~VectorBiTable() { delete fp_; }
    193 
    194   I FindId(const T &entry) {
    195     ssize_t fp = (*fp_)(entry);
    196     if (fp >= fp2id_.size())
    197       fp2id_.resize(fp + 1);
    198     I &id_ref = fp2id_[fp];
    199     if (id_ref == 0) {  // T not found; store and assign it a new ID.
    200       id2entry_.push_back(entry);
    201       id_ref = id2entry_.size();
    202     }
    203     return id_ref - 1;  // NB: id_ref = ID + 1
    204   }
    205 
    206   const T &FindEntry(I s) const { return id2entry_[s]; }
    207 
    208   I Size() const { return id2entry_.size(); }
    209 
    210   const FP &Fingerprint() const { return *fp_; }
    211 
    212  private:
    213   FP *fp_;
    214   vector<I> fp2id_;
    215   vector<T> id2entry_;
    216 
    217   DISALLOW_COPY_AND_ASSIGN(VectorBiTable);
    218 };
    219 
    220 
    221 // An implementation using a vector and a compact hash table. The
    222 // selecting functor S returns true for entries to be hashed in the
    223 // vector.  The fingerprinting functor FP returns a unique fingerprint
    224 // for each entry to be hashed in the vector (these need to be
    225 // suitable for indexing in a vector).  The hash functor H is used when
    226 // hashing entry into the compact hash table.
    227 template <class I, class T, class S, class FP, class H>
    228 class VectorHashBiTable {
    229  public:
    230   friend class HashFunc;
    231   friend class HashEqual;
    232 
    233   VectorHashBiTable(S *s, FP *fp, H *h,
    234                        size_t vector_size = 0,
    235                        size_t entry_size = 0)
    236       : selector_(s),
    237         fp_(fp),
    238         h_(h),
    239         hash_func_(*this),
    240         hash_equal_(*this),
    241         keys_(0, hash_func_, hash_equal_) {
    242     if (vector_size)
    243       fp2id_.reserve(vector_size);
    244     if (entry_size)
    245       id2entry_.reserve(entry_size);
    246   }
    247 
    248   ~VectorHashBiTable() {
    249     delete selector_;
    250     delete fp_;
    251     delete h_;
    252   }
    253 
    254   I FindId(const T &entry) {
    255     if ((*selector_)(entry)) {  // Use the vector if 'selector_(entry) == true'
    256       uint64 fp = (*fp_)(entry);
    257       if (fp2id_.size() <= fp)
    258         fp2id_.resize(fp + 1, 0);
    259       if (fp2id_[fp] == 0) {
    260         id2entry_.push_back(entry);
    261         fp2id_[fp] = id2entry_.size();
    262       }
    263       return fp2id_[fp] - 1;  // NB: assoc_value = ID + 1
    264     } else {  // Use the hash table otherwise.
    265       current_entry_ = &entry;
    266       typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
    267       if (it == keys_.end()) {
    268         I key = id2entry_.size();
    269         id2entry_.push_back(entry);
    270         keys_.insert(key);
    271         return key;
    272       } else {
    273         return *it;
    274       }
    275     }
    276   }
    277 
    278   const T &FindEntry(I s) const {
    279     return id2entry_[s];
    280   }
    281 
    282   I Size() const { return id2entry_.size(); }
    283 
    284   const S &Selector() const { return *selector_; }
    285 
    286   const FP &Fingerprint() const { return *fp_; }
    287 
    288   const H &Hash() const { return *h_; }
    289 
    290  private:
    291   static const I kEmptyKey;
    292   static const I kCurrentKey;
    293 
    294   class HashFunc {
    295    public:
    296     HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
    297 
    298     size_t operator()(I k) const { return (*(ht_->h_))(ht_->Key2Entry(k)); }
    299    private:
    300     const VectorHashBiTable *ht_;
    301   };
    302 
    303   class HashEqual {
    304    public:
    305     HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
    306 
    307     bool operator()(I k1, I k2) const {
    308       return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
    309     }
    310    private:
    311     const VectorHashBiTable *ht_;
    312   };
    313 
    314   typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
    315 
    316   const T &Key2Entry(I k) const {
    317     if (k == kEmptyKey)
    318       return empty_entry_;
    319     else if (k == kCurrentKey)
    320       return *current_entry_;
    321     else
    322       return id2entry_[k];
    323   }
    324 
    325 
    326   S *selector_;  // Returns true if entry hashed into vector
    327   FP *fp_;       // Fingerprint used when hashing entry into vector
    328   H *h_;         // Hash function used when hashing entry into hash_set
    329 
    330   vector<T> id2entry_;  // Maps state IDs to entry
    331   vector<I> fp2id_;        // Maps entry fingerprints to IDs
    332 
    333   // Compact implementation of the hash table mapping entrys to
    334   // state IDs using the hash function 'h_'
    335   HashFunc hash_func_;
    336   HashEqual hash_equal_;
    337   KeyHashSet keys_;
    338   const T empty_entry_;
    339   const T *current_entry_;
    340 
    341   DISALLOW_COPY_AND_ASSIGN(VectorHashBiTable);
    342 };
    343 
    344 template <class I, class T, class S, class FP, class H>
    345 const I VectorHashBiTable<I, T, S, FP, H>::kEmptyKey = -1;
    346 
    347 template <class I, class T, class S, class FP, class H>
    348 const I VectorHashBiTable<I, T, S, FP, H>::kCurrentKey = -2;
    349 
    350 
    351 // An implementation using a hash map for the entry to ID
    352 // mapping. This version permits erasing of s.  The  entry T
    353 // must have == defined and its default constructor must produce a
    354 // entry that will never be seen. F is the hash function.
    355 template <class I, class T, class F>
    356 class ErasableBiTable {
    357  public:
    358   ErasableBiTable() : first_(0) {}
    359 
    360   I FindId(const T &entry) {
    361     I &id_ref = entry2id_[entry];
    362     if (id_ref == 0) {  // T not found; store and assign it a new ID.
    363       id2entry_.push_back(entry);
    364       id_ref = id2entry_.size() + first_;
    365     }
    366     return id_ref - 1;  // NB: id_ref = ID + 1
    367   }
    368 
    369   const T &FindEntry(I s) const { return id2entry_[s - first_]; }
    370 
    371   I Size() const { return id2entry_.size(); }
    372 
    373   void Erase(I s) {
    374     T &entry = id2entry_[s - first_];
    375     typename unordered_map<T, I, F>::iterator it =
    376         entry2id_.find(entry);
    377     entry2id_.erase(it);
    378     id2entry_[s - first_] = empty_entry_;
    379     while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
    380       id2entry_.pop_front();
    381       ++first_;
    382     }
    383   }
    384 
    385  private:
    386   unordered_map<T, I, F> entry2id_;
    387   deque<T> id2entry_;
    388   const T empty_entry_;
    389   I first_;        // I of first element in the deque;
    390 
    391   DISALLOW_COPY_AND_ASSIGN(ErasableBiTable);
    392 };
    393 
    394 }  // namespace fst
    395 
    396 #endif  // FST_LIB_BI_TABLE_H__
    397