1 // bi-table.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Classes for representing a bijective mapping between an arbitrary entry 20 // of type T and a signed integral ID. 21 22 #ifndef FST_LIB_BI_TABLE_H__ 23 #define FST_LIB_BI_TABLE_H__ 24 25 #include <deque> 26 using std::deque; 27 #include <functional> 28 #include <vector> 29 using std::vector; 30 31 #include <tr1/unordered_set> 32 using std::tr1::unordered_set; 33 using std::tr1::unordered_multiset; 34 35 namespace fst { 36 37 // BI TABLES - these determine a bijective mapping between an 38 // arbitrary entry of type T and an signed integral ID of type I. The IDs are 39 // allocated starting from 0 in order. 40 // 41 // template <class I, class T> 42 // class BiTable { 43 // public: 44 // 45 // // Required constructors. 46 // BiTable(); 47 // 48 // // Lookup integer ID from entry. If it doesn't exist and 'insert' 49 // / is true, then add it. Otherwise return -1. 50 // I FindId(const T &entry, bool insert = true); 51 // // Lookup entry from integer ID. 52 // const T &FindEntry(I) const; 53 // // # of stored entries. 54 // I Size() const; 55 // }; 56 57 // An implementation using a hash map for the entry to ID mapping. 58 // H is the hash function and E is the equality function. 59 // If passed to the constructor, ownership is given to this class. 60 61 template <class I, class T, class H, class E = std::equal_to<T> > 62 class HashBiTable { 63 public: 64 // Reserves space for 'table_size' elements. 65 explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) 66 : hash_func_(h), 67 hash_equal_(e), 68 entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { 69 if (table_size) 70 id2entry_.reserve(table_size); 71 } 72 73 HashBiTable(const HashBiTable<I, T, H, E> &table) 74 : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 75 hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 76 entry2id_(table.entry2id_.begin(), table.entry2id_.end(), 77 table.entry2id_.size(), 78 (hash_func_ ? *hash_func_ : H()), 79 (hash_equal_ ? *hash_equal_ : E())), 80 id2entry_(table.id2entry_) { } 81 82 ~HashBiTable() { 83 delete hash_func_; 84 delete hash_equal_; 85 } 86 87 I FindId(const T &entry, bool insert = true) { 88 I &id_ref = entry2id_[entry]; 89 if (id_ref == 0) { // T not found 90 if (insert) { // store and assign it a new ID 91 id2entry_.push_back(entry); 92 id_ref = id2entry_.size(); 93 } else { 94 return -1; 95 } 96 } 97 return id_ref - 1; // NB: id_ref = ID + 1 98 } 99 100 const T &FindEntry(I s) const { 101 return id2entry_[s]; 102 } 103 104 I Size() const { return id2entry_.size(); } 105 106 private: 107 H *hash_func_; 108 E *hash_equal_; 109 unordered_map<T, I, H, E> entry2id_; 110 vector<T> id2entry_; 111 112 void operator=(const HashBiTable<I, T, H, E> &table); // disallow 113 }; 114 115 116 // Enables alternative hash set representations below. 117 // typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; 118 typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; 119 120 // Default hash set is STL hash_set 121 template<class K, class H, class E, HSType> 122 struct HashSet : public unordered_set<K, H, E> { 123 HashSet(size_t n = 0, const H &h = H(), const E &e = E()) 124 : unordered_set<K, H, E>(n, h, e) { } 125 126 void rehash(size_t n) { } 127 }; 128 129 130 // An implementation using a hash set for the entry to ID mapping. 131 // The hash set holds 'keys' which are either the ID or kCurrentKey. 132 // These keys can be mapped to entrys either by looking up in the 133 // entry vector or, if kCurrentKey, in current_entry_ member. The hash 134 // and key equality functions map to entries first. H 135 // is the hash function and E is the equality function. If passed to 136 // the constructor, ownership is given to this class. 137 template <class I, class T, class H, 138 class E = std::equal_to<T>, HSType HS = HS_DENSE> 139 class CompactHashBiTable { 140 public: 141 friend class HashFunc; 142 friend class HashEqual; 143 144 // Reserves space for 'table_size' elements. 145 explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) 146 : hash_func_(h), 147 hash_equal_(e), 148 compact_hash_func_(*this), 149 compact_hash_equal_(*this), 150 keys_(table_size, compact_hash_func_, compact_hash_equal_) { 151 if (table_size) 152 id2entry_.reserve(table_size); 153 } 154 155 CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table) 156 : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 157 hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 158 compact_hash_func_(*this), 159 compact_hash_equal_(*this), 160 keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), 161 id2entry_(table.id2entry_) { 162 keys_.insert(table.keys_.begin(), table.keys_.end()); 163 } 164 165 ~CompactHashBiTable() { 166 delete hash_func_; 167 delete hash_equal_; 168 } 169 170 I FindId(const T &entry, bool insert = true) { 171 current_entry_ = &entry; 172 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 173 if (it == keys_.end()) { // T not found 174 if (insert) { // store and assign it a new ID 175 I key = id2entry_.size(); 176 id2entry_.push_back(entry); 177 keys_.insert(key); 178 return key; 179 } else { 180 return -1; 181 } 182 } else { 183 return *it; 184 } 185 } 186 187 const T &FindEntry(I s) const { return id2entry_[s]; } 188 189 I Size() const { return id2entry_.size(); } 190 191 // Clear content. With argument, erases last n IDs. 192 void Clear(ssize_t n = -1) { 193 if (n < 0 || n > id2entry_.size()) 194 n = id2entry_.size(); 195 while (n-- > 0) { 196 I key = id2entry_.size() - 1; 197 keys_.erase(key); 198 id2entry_.pop_back(); 199 } 200 keys_.rehash(0); 201 } 202 203 private: 204 static const I kCurrentKey; // -1 205 static const I kEmptyKey; // -2 206 static const I kDeletedKey; // -3 207 208 class HashFunc { 209 public: 210 HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} 211 212 size_t operator()(I k) const { 213 if (k >= kCurrentKey) { 214 return (*ht_->hash_func_)(ht_->Key2Entry(k)); 215 } else { 216 return 0; 217 } 218 } 219 220 private: 221 const CompactHashBiTable *ht_; 222 }; 223 224 class HashEqual { 225 public: 226 HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} 227 228 bool operator()(I k1, I k2) const { 229 if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 230 return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); 231 } else { 232 return k1 == k2; 233 } 234 } 235 private: 236 const CompactHashBiTable *ht_; 237 }; 238 239 typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 240 241 const T &Key2Entry(I k) const { 242 if (k == kCurrentKey) 243 return *current_entry_; 244 else 245 return id2entry_[k]; 246 } 247 248 H *hash_func_; 249 E *hash_equal_; 250 HashFunc compact_hash_func_; 251 HashEqual compact_hash_equal_; 252 KeyHashSet keys_; 253 vector<T> id2entry_; 254 const T *current_entry_; 255 256 void operator=(const CompactHashBiTable<I, T, H, E, HS> &table); // disallow 257 }; 258 259 260 template <class I, class T, class H, class E, HSType HS> 261 const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1; 262 263 template <class I, class T, class H, class E, HSType HS> 264 const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2; 265 266 template <class I, class T, class H, class E, HSType HS> 267 const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3; 268 269 270 // An implementation using a vector for the entry to ID mapping. 271 // It is passed a function object FP that should fingerprint entries 272 // uniquely to an integer that can used as a vector index. Normally, 273 // VectorBiTable constructs the FP object. The user can instead 274 // pass in this object; in that case, VectorBiTable takes its 275 // ownership. 276 template <class I, class T, class FP> 277 class VectorBiTable { 278 public: 279 // Reserves space for 'table_size' elements. 280 explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) 281 : fp_(fp ? fp : new FP()) { 282 if (table_size) 283 id2entry_.reserve(table_size); 284 } 285 286 VectorBiTable(const VectorBiTable<I, T, FP> &table) 287 : fp_(table.fp_ ? new FP(*table.fp_) : 0), 288 fp2id_(table.fp2id_), 289 id2entry_(table.id2entry_) { } 290 291 ~VectorBiTable() { delete fp_; } 292 293 I FindId(const T &entry, bool insert = true) { 294 ssize_t fp = (*fp_)(entry); 295 if (fp >= fp2id_.size()) 296 fp2id_.resize(fp + 1); 297 I &id_ref = fp2id_[fp]; 298 if (id_ref == 0) { // T not found 299 if (insert) { // store and assign it a new ID 300 id2entry_.push_back(entry); 301 id_ref = id2entry_.size(); 302 } else { 303 return -1; 304 } 305 } 306 return id_ref - 1; // NB: id_ref = ID + 1 307 } 308 309 const T &FindEntry(I s) const { return id2entry_[s]; } 310 311 I Size() const { return id2entry_.size(); } 312 313 const FP &Fingerprint() const { return *fp_; } 314 315 private: 316 FP *fp_; 317 vector<I> fp2id_; 318 vector<T> id2entry_; 319 320 void operator=(const VectorBiTable<I, T, FP> &table); // disallow 321 }; 322 323 324 // An implementation using a vector and a compact hash table. The 325 // selecting functor S returns true for entries to be hashed in the 326 // vector. The fingerprinting functor FP returns a unique fingerprint 327 // for each entry to be hashed in the vector (these need to be 328 // suitable for indexing in a vector). The hash functor H is used 329 // when hashing entry into the compact hash table. If passed to the 330 // constructor, ownership is given to this class. 331 template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE> 332 class VectorHashBiTable { 333 public: 334 friend class HashFunc; 335 friend class HashEqual; 336 337 explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0, 338 size_t vector_size = 0, 339 size_t entry_size = 0) 340 : selector_(s), 341 fp_(fp ? fp : new FP()), 342 h_(h ? h : new H()), 343 hash_func_(*this), 344 hash_equal_(*this), 345 keys_(0, hash_func_, hash_equal_) { 346 if (vector_size) 347 fp2id_.reserve(vector_size); 348 if (entry_size) 349 id2entry_.reserve(entry_size); 350 } 351 352 VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table) 353 : selector_(new S(table.s_)), 354 fp_(table.fp_ ? new FP(*table.fp_) : 0), 355 h_(table.h_ ? new H(*table.h_) : 0), 356 id2entry_(table.id2entry_), 357 fp2id_(table.fp2id_), 358 hash_func_(*this), 359 hash_equal_(*this), 360 keys_(table.keys_.size(), hash_func_, hash_equal_) { 361 keys_.insert(table.keys_.begin(), table.keys_.end()); 362 } 363 364 ~VectorHashBiTable() { 365 delete selector_; 366 delete fp_; 367 delete h_; 368 } 369 370 I FindId(const T &entry, bool insert = true) { 371 if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' 372 uint64 fp = (*fp_)(entry); 373 if (fp2id_.size() <= fp) 374 fp2id_.resize(fp + 1, 0); 375 if (fp2id_[fp] == 0) { // T not found 376 if (insert) { // store and assign it a new ID 377 id2entry_.push_back(entry); 378 fp2id_[fp] = id2entry_.size(); 379 } else { 380 return -1; 381 } 382 } 383 return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 384 } else { // Use the hash table otherwise. 385 current_entry_ = &entry; 386 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 387 if (it == keys_.end()) { 388 if (insert) { 389 I key = id2entry_.size(); 390 id2entry_.push_back(entry); 391 keys_.insert(key); 392 return key; 393 } else { 394 return -1; 395 } 396 } else { 397 return *it; 398 } 399 } 400 } 401 402 const T &FindEntry(I s) const { 403 return id2entry_[s]; 404 } 405 406 I Size() const { return id2entry_.size(); } 407 408 const S &Selector() const { return *selector_; } 409 410 const FP &Fingerprint() const { return *fp_; } 411 412 const H &Hash() const { return *h_; } 413 414 private: 415 static const I kCurrentKey; // -1 416 static const I kEmptyKey; // -2 417 418 class HashFunc { 419 public: 420 HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} 421 422 size_t operator()(I k) const { 423 if (k >= kCurrentKey) { 424 return (*(ht_->h_))(ht_->Key2Entry(k)); 425 } else { 426 return 0; 427 } 428 } 429 private: 430 const VectorHashBiTable *ht_; 431 }; 432 433 class HashEqual { 434 public: 435 HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} 436 437 bool operator()(I k1, I k2) const { 438 if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 439 return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); 440 } else { 441 return k1 == k2; 442 } 443 } 444 private: 445 const VectorHashBiTable *ht_; 446 }; 447 448 typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 449 450 const T &Key2Entry(I k) const { 451 if (k == kCurrentKey) 452 return *current_entry_; 453 else 454 return id2entry_[k]; 455 } 456 457 S *selector_; // Returns true if entry hashed into vector 458 FP *fp_; // Fingerprint used when hashing entry into vector 459 H *h_; // Hash function used when hashing entry into hash_set 460 461 vector<T> id2entry_; // Maps state IDs to entry 462 vector<I> fp2id_; // Maps entry fingerprints to IDs 463 464 // Compact implementation of the hash table mapping entrys to 465 // state IDs using the hash function 'h_' 466 HashFunc hash_func_; 467 HashEqual hash_equal_; 468 KeyHashSet keys_; 469 const T *current_entry_; 470 471 // disallow 472 void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table); 473 }; 474 475 template <class I, class T, class S, class FP, class H, HSType HS> 476 const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1; 477 478 template <class I, class T, class S, class FP, class H, HSType HS> 479 const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3; 480 481 482 // An implementation using a hash map for the entry to ID 483 // mapping. This version permits erasing of arbitrary states. The 484 // entry T must have == defined and its default constructor must 485 // produce a entry that will never be seen. F is the hash function. 486 template <class I, class T, class F> 487 class ErasableBiTable { 488 public: 489 ErasableBiTable() : first_(0) {} 490 491 I FindId(const T &entry, bool insert = true) { 492 I &id_ref = entry2id_[entry]; 493 if (id_ref == 0) { // T not found 494 if (insert) { // store and assign it a new ID 495 id2entry_.push_back(entry); 496 id_ref = id2entry_.size() + first_; 497 } else { 498 return -1; 499 } 500 } 501 return id_ref - 1; // NB: id_ref = ID + 1 502 } 503 504 const T &FindEntry(I s) const { return id2entry_[s - first_]; } 505 506 I Size() const { return id2entry_.size(); } 507 508 void Erase(I s) { 509 T &entry = id2entry_[s - first_]; 510 typename unordered_map<T, I, F>::iterator it = 511 entry2id_.find(entry); 512 entry2id_.erase(it); 513 id2entry_[s - first_] = empty_entry_; 514 while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { 515 id2entry_.pop_front(); 516 ++first_; 517 } 518 } 519 520 private: 521 unordered_map<T, I, F> entry2id_; 522 deque<T> id2entry_; 523 const T empty_entry_; 524 I first_; // I of first element in the deque; 525 526 // disallow 527 void operator=(const ErasableBiTable<I, T, F> &table); //disallow 528 }; 529 530 } // namespace fst 531 532 #endif // FST_LIB_BI_TABLE_H__ 533