Home | History | Annotate | Download | only in fst
      1 // bi-table.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Classes for representing a bijective mapping between an arbitrary entry
     20 // of type T and a signed integral ID.
     21 
     22 #ifndef FST_LIB_BI_TABLE_H__
     23 #define FST_LIB_BI_TABLE_H__
     24 
     25 #include <deque>
     26 using std::deque;
     27 #include <functional>
     28 #include <vector>
     29 using std::vector;
     30 
     31 #include <tr1/unordered_set>
     32 using std::tr1::unordered_set;
     33 using std::tr1::unordered_multiset;
     34 
     35 namespace fst {
     36 
     37 // BI TABLES - these determine a bijective mapping between an
     38 // arbitrary entry of type T and an signed integral ID of type I. The IDs are
     39 // allocated starting from 0 in order.
     40 //
     41 // template <class I, class T>
     42 // class BiTable {
     43 //  public:
     44 //
     45 //   // Required constructors.
     46 //   BiTable();
     47 //
     48 //   // Lookup integer ID from entry. If it doesn't exist and 'insert'
     49 //   / is true, then add it. Otherwise return -1.
     50 //   I FindId(const T &entry, bool insert = true);
     51 //   // Lookup entry from integer ID.
     52 //   const T &FindEntry(I) const;
     53 //   // # of stored entries.
     54 //   I Size() const;
     55 // };
     56 
     57 // An implementation using a hash map for the entry to ID mapping.
     58 // H is the hash function and E is the equality function.
     59 // If passed to the constructor, ownership is given to this class.
     60 
     61 template <class I, class T, class H, class E = std::equal_to<T> >
     62 class HashBiTable {
     63  public:
     64   // Reserves space for 'table_size' elements.
     65   explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
     66       : hash_func_(h),
     67         hash_equal_(e),
     68         entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) {
     69     if (table_size)
     70       id2entry_.reserve(table_size);
     71   }
     72 
     73   HashBiTable(const HashBiTable<I, T, H, E> &table)
     74       : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
     75         hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
     76         entry2id_(table.entry2id_.begin(), table.entry2id_.end(),
     77                   table.entry2id_.size(),
     78                   (hash_func_ ? *hash_func_ : H()),
     79                   (hash_equal_ ? *hash_equal_ : E())),
     80         id2entry_(table.id2entry_) { }
     81 
     82   ~HashBiTable() {
     83     delete hash_func_;
     84     delete hash_equal_;
     85   }
     86 
     87   I FindId(const T &entry, bool insert = true) {
     88     I &id_ref = entry2id_[entry];
     89     if (id_ref == 0) {  // T not found
     90       if (insert) {     // store and assign it a new ID
     91         id2entry_.push_back(entry);
     92         id_ref = id2entry_.size();
     93       } else {
     94         return -1;
     95       }
     96     }
     97     return id_ref - 1;  // NB: id_ref = ID + 1
     98   }
     99 
    100   const T &FindEntry(I s) const {
    101     return id2entry_[s];
    102   }
    103 
    104   I Size() const { return id2entry_.size(); }
    105 
    106  private:
    107   H *hash_func_;
    108   E *hash_equal_;
    109   unordered_map<T, I, H, E> entry2id_;
    110   vector<T> id2entry_;
    111 
    112   void operator=(const HashBiTable<I, T, H, E> &table);  // disallow
    113 };
    114 
    115 
    116 // Enables alternative hash set representations below.
    117 // typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
    118 typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
    119 
    120 // Default hash set is STL hash_set
    121 template<class K, class H, class E, HSType>
    122 struct  HashSet : public unordered_set<K, H, E> {
    123   HashSet(size_t n = 0, const H &h = H(), const E &e = E())
    124       : unordered_set<K, H, E>(n, h, e)  { }
    125 
    126   void rehash(size_t n) { }
    127 };
    128 
    129 
    130 // An implementation using a hash set for the entry to ID mapping.
    131 // The hash set holds 'keys' which are either the ID or kCurrentKey.
    132 // These keys can be mapped to entrys either by looking up in the
    133 // entry vector or, if kCurrentKey, in current_entry_ member. The hash
    134 // and key equality functions map to entries first. H
    135 // is the hash function and E is the equality function. If passed to
    136 // the constructor, ownership is given to this class.
    137 template <class I, class T, class H,
    138           class E = std::equal_to<T>, HSType HS = HS_DENSE>
    139 class CompactHashBiTable {
    140  public:
    141   friend class HashFunc;
    142   friend class HashEqual;
    143 
    144   // Reserves space for 'table_size' elements.
    145   explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
    146       : hash_func_(h),
    147         hash_equal_(e),
    148         compact_hash_func_(*this),
    149         compact_hash_equal_(*this),
    150         keys_(table_size, compact_hash_func_, compact_hash_equal_) {
    151     if (table_size)
    152       id2entry_.reserve(table_size);
    153   }
    154 
    155   CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table)
    156       : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
    157         hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
    158         compact_hash_func_(*this),
    159         compact_hash_equal_(*this),
    160         keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_),
    161         id2entry_(table.id2entry_) {
    162     keys_.insert(table.keys_.begin(), table.keys_.end());
    163  }
    164 
    165   ~CompactHashBiTable() {
    166     delete hash_func_;
    167     delete hash_equal_;
    168   }
    169 
    170   I FindId(const T &entry, bool insert = true) {
    171     current_entry_ = &entry;
    172     typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
    173     if (it == keys_.end()) {  // T not found
    174       if (insert) {           // store and assign it a new ID
    175         I key = id2entry_.size();
    176         id2entry_.push_back(entry);
    177         keys_.insert(key);
    178         return key;
    179       } else {
    180         return -1;
    181       }
    182     } else {
    183       return *it;
    184     }
    185   }
    186 
    187   const T &FindEntry(I s) const { return id2entry_[s]; }
    188 
    189   I Size() const { return id2entry_.size(); }
    190 
    191   // Clear content. With argument, erases last n IDs.
    192   void Clear(ssize_t n = -1) {
    193     if (n < 0 || n > id2entry_.size())
    194       n = id2entry_.size();
    195     while (n-- > 0) {
    196       I key = id2entry_.size() - 1;
    197       keys_.erase(key);
    198       id2entry_.pop_back();
    199     }
    200     keys_.rehash(0);
    201   }
    202 
    203  private:
    204   static const I kCurrentKey;    // -1
    205   static const I kEmptyKey;      // -2
    206   static const I kDeletedKey;    // -3
    207 
    208   class HashFunc {
    209    public:
    210     HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
    211 
    212     size_t operator()(I k) const {
    213       if (k >= kCurrentKey) {
    214         return (*ht_->hash_func_)(ht_->Key2Entry(k));
    215       } else {
    216         return 0;
    217       }
    218     }
    219 
    220    private:
    221     const CompactHashBiTable *ht_;
    222   };
    223 
    224   class HashEqual {
    225    public:
    226     HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
    227 
    228     bool operator()(I k1, I k2) const {
    229       if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
    230         return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2));
    231       } else {
    232         return k1 == k2;
    233       }
    234     }
    235    private:
    236     const CompactHashBiTable *ht_;
    237   };
    238 
    239   typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
    240 
    241   const T &Key2Entry(I k) const {
    242     if (k == kCurrentKey)
    243       return *current_entry_;
    244     else
    245       return id2entry_[k];
    246   }
    247 
    248   H *hash_func_;
    249   E *hash_equal_;
    250   HashFunc compact_hash_func_;
    251   HashEqual compact_hash_equal_;
    252   KeyHashSet keys_;
    253   vector<T> id2entry_;
    254   const T *current_entry_;
    255 
    256   void operator=(const CompactHashBiTable<I, T, H, E, HS> &table);  // disallow
    257 };
    258 
    259 
    260 template <class I, class T, class H, class E, HSType HS>
    261 const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1;
    262 
    263 template <class I, class T, class H, class E, HSType HS>
    264 const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2;
    265 
    266 template <class I, class T, class H, class E, HSType HS>
    267 const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3;
    268 
    269 
    270 // An implementation using a vector for the entry to ID mapping.
    271 // It is passed a function object FP that should fingerprint entries
    272 // uniquely to an integer that can used as a vector index. Normally,
    273 // VectorBiTable constructs the FP object.  The user can instead
    274 // pass in this object; in that case, VectorBiTable takes its
    275 // ownership.
    276 template <class I, class T, class FP>
    277 class VectorBiTable {
    278  public:
    279   // Reserves space for 'table_size' elements.
    280   explicit VectorBiTable(FP *fp = 0, size_t table_size = 0)
    281       : fp_(fp ? fp : new FP()) {
    282     if (table_size)
    283       id2entry_.reserve(table_size);
    284   }
    285 
    286   VectorBiTable(const VectorBiTable<I, T, FP> &table)
    287       : fp_(table.fp_ ? new FP(*table.fp_) : 0),
    288         fp2id_(table.fp2id_),
    289         id2entry_(table.id2entry_) { }
    290 
    291   ~VectorBiTable() { delete fp_; }
    292 
    293   I FindId(const T &entry, bool insert = true) {
    294     ssize_t fp = (*fp_)(entry);
    295     if (fp >= fp2id_.size())
    296       fp2id_.resize(fp + 1);
    297     I &id_ref = fp2id_[fp];
    298     if (id_ref == 0) {  // T not found
    299       if (insert) {     // store and assign it a new ID
    300         id2entry_.push_back(entry);
    301         id_ref = id2entry_.size();
    302       } else {
    303         return -1;
    304       }
    305     }
    306     return id_ref - 1;  // NB: id_ref = ID + 1
    307   }
    308 
    309   const T &FindEntry(I s) const { return id2entry_[s]; }
    310 
    311   I Size() const { return id2entry_.size(); }
    312 
    313   const FP &Fingerprint() const { return *fp_; }
    314 
    315  private:
    316   FP *fp_;
    317   vector<I> fp2id_;
    318   vector<T> id2entry_;
    319 
    320   void operator=(const VectorBiTable<I, T, FP> &table);  // disallow
    321 };
    322 
    323 
    324 // An implementation using a vector and a compact hash table. The
    325 // selecting functor S returns true for entries to be hashed in the
    326 // vector.  The fingerprinting functor FP returns a unique fingerprint
    327 // for each entry to be hashed in the vector (these need to be
    328 // suitable for indexing in a vector).  The hash functor H is used
    329 // when hashing entry into the compact hash table.  If passed to the
    330 // constructor, ownership is given to this class.
    331 template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE>
    332 class VectorHashBiTable {
    333  public:
    334   friend class HashFunc;
    335   friend class HashEqual;
    336 
    337   explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0,
    338                              size_t vector_size = 0,
    339                              size_t entry_size = 0)
    340       : selector_(s),
    341         fp_(fp ? fp : new FP()),
    342         h_(h ? h : new H()),
    343         hash_func_(*this),
    344         hash_equal_(*this),
    345         keys_(0, hash_func_, hash_equal_) {
    346     if (vector_size)
    347       fp2id_.reserve(vector_size);
    348     if (entry_size)
    349       id2entry_.reserve(entry_size);
    350  }
    351 
    352   VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table)
    353       : selector_(new S(table.s_)),
    354         fp_(table.fp_ ? new FP(*table.fp_) : 0),
    355         h_(table.h_ ? new H(*table.h_) : 0),
    356         id2entry_(table.id2entry_),
    357         fp2id_(table.fp2id_),
    358         hash_func_(*this),
    359         hash_equal_(*this),
    360         keys_(table.keys_.size(), hash_func_, hash_equal_) {
    361      keys_.insert(table.keys_.begin(), table.keys_.end());
    362   }
    363 
    364   ~VectorHashBiTable() {
    365     delete selector_;
    366     delete fp_;
    367     delete h_;
    368   }
    369 
    370   I FindId(const T &entry, bool insert = true) {
    371     if ((*selector_)(entry)) {  // Use the vector if 'selector_(entry) == true'
    372       uint64 fp = (*fp_)(entry);
    373       if (fp2id_.size() <= fp)
    374         fp2id_.resize(fp + 1, 0);
    375       if (fp2id_[fp] == 0) {         // T not found
    376         if (insert) {                // store and assign it a new ID
    377           id2entry_.push_back(entry);
    378           fp2id_[fp] = id2entry_.size();
    379         } else {
    380           return -1;
    381         }
    382       }
    383       return fp2id_[fp] - 1;  // NB: assoc_value = ID + 1
    384     } else {  // Use the hash table otherwise.
    385       current_entry_ = &entry;
    386       typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
    387       if (it == keys_.end()) {
    388         if (insert) {
    389           I key = id2entry_.size();
    390           id2entry_.push_back(entry);
    391           keys_.insert(key);
    392           return key;
    393         } else {
    394           return -1;
    395         }
    396       } else {
    397         return *it;
    398       }
    399     }
    400   }
    401 
    402   const T &FindEntry(I s) const {
    403     return id2entry_[s];
    404   }
    405 
    406   I Size() const { return id2entry_.size(); }
    407 
    408   const S &Selector() const { return *selector_; }
    409 
    410   const FP &Fingerprint() const { return *fp_; }
    411 
    412   const H &Hash() const { return *h_; }
    413 
    414  private:
    415   static const I kCurrentKey;  // -1
    416   static const I kEmptyKey;    // -2
    417 
    418   class HashFunc {
    419    public:
    420     HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
    421 
    422     size_t operator()(I k) const {
    423       if (k >= kCurrentKey) {
    424         return (*(ht_->h_))(ht_->Key2Entry(k));
    425       } else {
    426         return 0;
    427       }
    428     }
    429    private:
    430     const VectorHashBiTable *ht_;
    431   };
    432 
    433   class HashEqual {
    434    public:
    435     HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
    436 
    437     bool operator()(I k1, I k2) const {
    438       if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
    439         return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
    440       } else {
    441         return k1 == k2;
    442       }
    443     }
    444    private:
    445     const VectorHashBiTable *ht_;
    446   };
    447 
    448   typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
    449 
    450   const T &Key2Entry(I k) const {
    451     if (k == kCurrentKey)
    452       return *current_entry_;
    453     else
    454       return id2entry_[k];
    455   }
    456 
    457   S *selector_;  // Returns true if entry hashed into vector
    458   FP *fp_;       // Fingerprint used when hashing entry into vector
    459   H *h_;         // Hash function used when hashing entry into hash_set
    460 
    461   vector<T> id2entry_;  // Maps state IDs to entry
    462   vector<I> fp2id_;     // Maps entry fingerprints to IDs
    463 
    464   // Compact implementation of the hash table mapping entrys to
    465   // state IDs using the hash function 'h_'
    466   HashFunc hash_func_;
    467   HashEqual hash_equal_;
    468   KeyHashSet keys_;
    469   const T *current_entry_;
    470 
    471   // disallow
    472   void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table);
    473 };
    474 
    475 template <class I, class T, class S, class FP, class H, HSType HS>
    476 const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1;
    477 
    478 template <class I, class T, class S, class FP, class H, HSType HS>
    479 const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3;
    480 
    481 
    482 // An implementation using a hash map for the entry to ID
    483 // mapping. This version permits erasing of arbitrary states.  The
    484 // entry T must have == defined and its default constructor must
    485 // produce a entry that will never be seen. F is the hash function.
    486 template <class I, class T, class F>
    487 class ErasableBiTable {
    488  public:
    489   ErasableBiTable() : first_(0) {}
    490 
    491   I FindId(const T &entry, bool insert = true) {
    492     I &id_ref = entry2id_[entry];
    493     if (id_ref == 0) {  // T not found
    494       if (insert) {     // store and assign it a new ID
    495         id2entry_.push_back(entry);
    496         id_ref = id2entry_.size() + first_;
    497       } else {
    498         return -1;
    499       }
    500     }
    501     return id_ref - 1;  // NB: id_ref = ID + 1
    502   }
    503 
    504   const T &FindEntry(I s) const { return id2entry_[s - first_]; }
    505 
    506   I Size() const { return id2entry_.size(); }
    507 
    508   void Erase(I s) {
    509     T &entry = id2entry_[s - first_];
    510     typename unordered_map<T, I, F>::iterator it =
    511         entry2id_.find(entry);
    512     entry2id_.erase(it);
    513     id2entry_[s - first_] = empty_entry_;
    514     while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
    515       id2entry_.pop_front();
    516       ++first_;
    517     }
    518   }
    519 
    520  private:
    521   unordered_map<T, I, F> entry2id_;
    522   deque<T> id2entry_;
    523   const T empty_entry_;
    524   I first_;        // I of first element in the deque;
    525 
    526   // disallow
    527   void operator=(const ErasableBiTable<I, T, F> &table);  //disallow
    528 };
    529 
    530 }  // namespace fst
    531 
    532 #endif  // FST_LIB_BI_TABLE_H__
    533