1 // bi-table.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Classes for representing a bijective mapping between an arbitrary entry 20 // of type T and a signed integral ID. 21 22 #ifndef FST_LIB_BI_TABLE_H__ 23 #define FST_LIB_BI_TABLE_H__ 24 25 #include <deque> 26 #include <vector> 27 using std::vector; 28 29 namespace fst { 30 31 // BI TABLES - these determine a bijective mapping between an 32 // arbitrary entry of type T and an signed integral ID of type I. The IDs are 33 // allocated starting from 0 in order. 34 // 35 // template <class I, class T> 36 // class BiTable { 37 // public: 38 // 39 // // Required constructors. 40 // BiTable(); 41 // 42 // // Lookup integer ID from entry. If it doesn't exist, then add it. 43 // I FindId(const T &entry); 44 // // Lookup entry from integer ID. 45 // const T &FindEntry(I) const; 46 // // # of stored entries. 47 // I Size() const; 48 // }; 49 50 // An implementation using a hash map for the entry to ID mapping. 51 // The entry T must have == defined and the default constructor 52 // must produce an entry that will never be seen. H is the hash function. 53 template <class I, class T, class H> 54 class HashBiTable { 55 public: 56 57 HashBiTable() { 58 T empty_entry; 59 } 60 61 I FindId(const T &entry) { 62 I &id_ref = entry2id_[entry]; 63 if (id_ref == 0) { // T not found; store and assign it a new ID. 64 id2entry_.push_back(entry); 65 id_ref = id2entry_.size(); 66 } 67 return id_ref - 1; // NB: id_ref = ID + 1 68 } 69 70 const T &FindEntry(I s) const { 71 return id2entry_[s]; 72 } 73 74 I Size() const { return id2entry_.size(); } 75 76 private: 77 unordered_map<T, I, H> entry2id_; 78 vector<T> id2entry_; 79 80 DISALLOW_COPY_AND_ASSIGN(HashBiTable); 81 }; 82 83 84 // An implementation using a hash set for the entry to ID 85 // mapping. The hash set holds 'keys' which are either the ID 86 // or kCurrentKey. These keys can be mapped to entrys either by 87 // looking up in the entry vector or, if kCurrentKey, in current_entry_ 88 // member. The hash and key equality functions map to entries first. 89 // The entry T must have == defined and the default constructor 90 // must produce a entry that will never be seen. H is the hash 91 // function. 92 template <class I, class T, class H> 93 class CompactHashBiTable { 94 public: 95 friend class HashFunc; 96 friend class HashEqual; 97 98 CompactHashBiTable() 99 : hash_func_(*this), 100 hash_equal_(*this), 101 keys_(0, hash_func_, hash_equal_) { 102 } 103 104 // Reserves space for table_size elements. 105 explicit CompactHashBiTable(size_t table_size) 106 : hash_func_(*this), 107 hash_equal_(*this), 108 keys_(table_size, hash_func_, hash_equal_) { 109 id2entry_.reserve(table_size); 110 } 111 112 I FindId(const T &entry) { 113 current_entry_ = &entry; 114 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 115 if (it == keys_.end()) { 116 I key = id2entry_.size(); 117 id2entry_.push_back(entry); 118 keys_.insert(key); 119 return key; 120 } else { 121 return *it; 122 } 123 } 124 125 const T &FindEntry(I s) const { return id2entry_[s]; } 126 I Size() const { return id2entry_.size(); } 127 128 private: 129 static const I kEmptyKey; // -1 130 static const I kCurrentKey; // -2 131 132 class HashFunc { 133 public: 134 HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} 135 136 size_t operator()(I k) const { return hf(ht_->Key2T(k)); } 137 private: 138 const CompactHashBiTable *ht_; 139 H hf; 140 }; 141 142 class HashEqual { 143 public: 144 HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} 145 146 bool operator()(I k1, I k2) const { 147 return ht_->Key2T(k1) == ht_->Key2T(k2); 148 } 149 private: 150 const CompactHashBiTable *ht_; 151 }; 152 153 typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet; 154 155 const T &Key2T(I k) const { 156 if (k == kEmptyKey) 157 return empty_entry_; 158 else if (k == kCurrentKey) 159 return *current_entry_; 160 else 161 return id2entry_[k]; 162 } 163 164 HashFunc hash_func_; 165 HashEqual hash_equal_; 166 KeyHashSet keys_; 167 vector<T> id2entry_; 168 const T empty_entry_; 169 const T *current_entry_; 170 171 DISALLOW_COPY_AND_ASSIGN(CompactHashBiTable); 172 }; 173 174 template <class I, class T, class H> 175 const I CompactHashBiTable<I, T, H>::kEmptyKey = -1; 176 177 template <class I, class T, class H> 178 const I CompactHashBiTable<I, T, H>::kCurrentKey = -2; 179 180 181 // An implementation using a vector for the entry to ID mapping. 182 // It is passed a function object FP that should fingerprint entries 183 // uniquely to an integer that can used as a vector index. Normally, 184 // VectorBiTable constructs the FP object. The user can instead 185 // pass in this object; in that case, VectorBiTable takes its 186 // ownership. 187 template <class I, class T, class FP> 188 class VectorBiTable { 189 public: 190 explicit VectorBiTable(FP *fp = 0) : fp_(fp ? fp : new FP()) {} 191 192 ~VectorBiTable() { delete fp_; } 193 194 I FindId(const T &entry) { 195 ssize_t fp = (*fp_)(entry); 196 if (fp >= fp2id_.size()) 197 fp2id_.resize(fp + 1); 198 I &id_ref = fp2id_[fp]; 199 if (id_ref == 0) { // T not found; store and assign it a new ID. 200 id2entry_.push_back(entry); 201 id_ref = id2entry_.size(); 202 } 203 return id_ref - 1; // NB: id_ref = ID + 1 204 } 205 206 const T &FindEntry(I s) const { return id2entry_[s]; } 207 208 I Size() const { return id2entry_.size(); } 209 210 const FP &Fingerprint() const { return *fp_; } 211 212 private: 213 FP *fp_; 214 vector<I> fp2id_; 215 vector<T> id2entry_; 216 217 DISALLOW_COPY_AND_ASSIGN(VectorBiTable); 218 }; 219 220 221 // An implementation using a vector and a compact hash table. The 222 // selecting functor S returns true for entries to be hashed in the 223 // vector. The fingerprinting functor FP returns a unique fingerprint 224 // for each entry to be hashed in the vector (these need to be 225 // suitable for indexing in a vector). The hash functor H is used when 226 // hashing entry into the compact hash table. 227 template <class I, class T, class S, class FP, class H> 228 class VectorHashBiTable { 229 public: 230 friend class HashFunc; 231 friend class HashEqual; 232 233 VectorHashBiTable(S *s, FP *fp, H *h, 234 size_t vector_size = 0, 235 size_t entry_size = 0) 236 : selector_(s), 237 fp_(fp), 238 h_(h), 239 hash_func_(*this), 240 hash_equal_(*this), 241 keys_(0, hash_func_, hash_equal_) { 242 if (vector_size) 243 fp2id_.reserve(vector_size); 244 if (entry_size) 245 id2entry_.reserve(entry_size); 246 } 247 248 ~VectorHashBiTable() { 249 delete selector_; 250 delete fp_; 251 delete h_; 252 } 253 254 I FindId(const T &entry) { 255 if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' 256 uint64 fp = (*fp_)(entry); 257 if (fp2id_.size() <= fp) 258 fp2id_.resize(fp + 1, 0); 259 if (fp2id_[fp] == 0) { 260 id2entry_.push_back(entry); 261 fp2id_[fp] = id2entry_.size(); 262 } 263 return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 264 } else { // Use the hash table otherwise. 265 current_entry_ = &entry; 266 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 267 if (it == keys_.end()) { 268 I key = id2entry_.size(); 269 id2entry_.push_back(entry); 270 keys_.insert(key); 271 return key; 272 } else { 273 return *it; 274 } 275 } 276 } 277 278 const T &FindEntry(I s) const { 279 return id2entry_[s]; 280 } 281 282 I Size() const { return id2entry_.size(); } 283 284 const S &Selector() const { return *selector_; } 285 286 const FP &Fingerprint() const { return *fp_; } 287 288 const H &Hash() const { return *h_; } 289 290 private: 291 static const I kEmptyKey; 292 static const I kCurrentKey; 293 294 class HashFunc { 295 public: 296 HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} 297 298 size_t operator()(I k) const { return (*(ht_->h_))(ht_->Key2Entry(k)); } 299 private: 300 const VectorHashBiTable *ht_; 301 }; 302 303 class HashEqual { 304 public: 305 HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} 306 307 bool operator()(I k1, I k2) const { 308 return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); 309 } 310 private: 311 const VectorHashBiTable *ht_; 312 }; 313 314 typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet; 315 316 const T &Key2Entry(I k) const { 317 if (k == kEmptyKey) 318 return empty_entry_; 319 else if (k == kCurrentKey) 320 return *current_entry_; 321 else 322 return id2entry_[k]; 323 } 324 325 326 S *selector_; // Returns true if entry hashed into vector 327 FP *fp_; // Fingerprint used when hashing entry into vector 328 H *h_; // Hash function used when hashing entry into hash_set 329 330 vector<T> id2entry_; // Maps state IDs to entry 331 vector<I> fp2id_; // Maps entry fingerprints to IDs 332 333 // Compact implementation of the hash table mapping entrys to 334 // state IDs using the hash function 'h_' 335 HashFunc hash_func_; 336 HashEqual hash_equal_; 337 KeyHashSet keys_; 338 const T empty_entry_; 339 const T *current_entry_; 340 341 DISALLOW_COPY_AND_ASSIGN(VectorHashBiTable); 342 }; 343 344 template <class I, class T, class S, class FP, class H> 345 const I VectorHashBiTable<I, T, S, FP, H>::kEmptyKey = -1; 346 347 template <class I, class T, class S, class FP, class H> 348 const I VectorHashBiTable<I, T, S, FP, H>::kCurrentKey = -2; 349 350 351 // An implementation using a hash map for the entry to ID 352 // mapping. This version permits erasing of s. The entry T 353 // must have == defined and its default constructor must produce a 354 // entry that will never be seen. F is the hash function. 355 template <class I, class T, class F> 356 class ErasableBiTable { 357 public: 358 ErasableBiTable() : first_(0) {} 359 360 I FindId(const T &entry) { 361 I &id_ref = entry2id_[entry]; 362 if (id_ref == 0) { // T not found; store and assign it a new ID. 363 id2entry_.push_back(entry); 364 id_ref = id2entry_.size() + first_; 365 } 366 return id_ref - 1; // NB: id_ref = ID + 1 367 } 368 369 const T &FindEntry(I s) const { return id2entry_[s - first_]; } 370 371 I Size() const { return id2entry_.size(); } 372 373 void Erase(I s) { 374 T &entry = id2entry_[s - first_]; 375 typename unordered_map<T, I, F>::iterator it = 376 entry2id_.find(entry); 377 entry2id_.erase(it); 378 id2entry_[s - first_] = empty_entry_; 379 while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { 380 id2entry_.pop_front(); 381 ++first_; 382 } 383 } 384 385 private: 386 unordered_map<T, I, F> entry2id_; 387 deque<T> id2entry_; 388 const T empty_entry_; 389 I first_; // I of first element in the deque; 390 391 DISALLOW_COPY_AND_ASSIGN(ErasableBiTable); 392 }; 393 394 } // namespace fst 395 396 #endif // FST_LIB_BI_TABLE_H__ 397