1 // compact-fst.h 2 3 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // Copyright 2005-2010 Google, Inc. 17 // Author: allauzen (at) google.com (Cyril Allauzen) 18 // 19 // \file 20 // FST Class for memory-efficient representation of common types of 21 // FSTs: linear automata, acceptors, unweighted FSTs, ... 22 23 #ifndef FST_LIB_COMPACT_FST_H__ 24 #define FST_LIB_COMPACT_FST_H__ 25 26 #include <iterator> 27 #include <utility> 28 using std::pair; using std::make_pair; 29 #include <vector> 30 using std::vector; 31 32 #include <fst/cache.h> 33 #include <fst/expanded-fst.h> 34 #include <fst/fst-decl.h> // For optional argument declarations 35 #include <fst/matcher.h> 36 #include <fst/test-properties.h> 37 #include <fst/util.h> 38 39 40 namespace fst { 41 42 struct CompactFstOptions : public CacheOptions { 43 // CompactFst default caching behaviour is to do no caching. Most 44 // compactors are cheap and therefore we save memory by not doing 45 // caching. 46 CompactFstOptions() : CacheOptions(true, 0) {} 47 CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} 48 }; 49 50 // Compactor Interface - class determinies how arcs and final weights 51 // are compacted and expanded. 52 // 53 // Final weights are treated as transitions to the superfinal state, 54 // i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId. 55 // 56 // There are two types of compactors: 57 // 58 // * Fixed out-degree compactors: 'compactor.Size()' returns a 59 // positive integer 's'. An FST can be compacted by this compactor 60 // only if each state has exactly 's' outgoing transitions (counting a 61 // non-Zero() final weight as a transition). A typical example is a 62 // compactor for string FSTs, i.e. 's == 1'. 63 // 64 // * Variable out-degree compactors: 'compactor.Size() == -1'. There 65 // are no out-degree restrictions for these compactors. 66 // 67 // 68 // class Compactor { 69 // public: 70 // // Element is the type of the compacted transitions. 71 // typedef ... Element; 72 // // Return the compacted representation of a transition 'arc' 73 // // at a state 's'. 74 // Element Compact(StateId s, const Arc &arc); 75 // // Return the transition at state 's' represented by the compacted 76 // // transition 'e'. 77 // Arc Expand(StateId s, const Element &e); 78 // // Return -1 for variable out-degree compactors, and the mandatory 79 // // out-degree otherwise. 80 // ssize_t Size(); 81 // // Test whether 'fst' can be compacted by this compactor. 82 // bool Compatible(const Fst<A> &fst); 83 // // Return the properties that are always true for an fst 84 // // compacted using this compactor 85 // uint64 Properties(); 86 // // Return a string identifying the type of compactor. 87 // static const string &Type(); 88 // // Write a compactor to a file. 89 // bool Write(ostream &strm); 90 // // Read a compactor from a file. 91 // static Compactor *Read(istream &strm); 92 // // Default constructor (optional, see comment below). 93 // Compactor(); 94 // }; 95 // 96 // The default constructor is only required for FST_REGISTER to work 97 // (i.e. enabling Convert() and the command-line utilities to work 98 // with this new compactor). However, a default constructor always 99 // needs to be specify for this code to compile, but one can have it 100 // simply raised an error when called: 101 // 102 // Compactor::Compactor() { 103 // FSTERROR() << "Compactor: no default constructor"; 104 // } 105 106 107 // Implementation data for Compact Fst, which can shared between otherwise 108 // independent copies. 109 // 110 // The implementation contains two arrays: 'states_' and 'compacts_'. 111 // 112 // For fixed out-degree compactors, the 'states_' array is unallocated. 113 // The 'compacts_' contains the compacted transitions. Its size is 114 // 'ncompacts_'. The outgoing transitions at a given state are stored 115 // consecutively. For a given state 's', its 'compactor.Size()' outgoing 116 // transitions (including superfinal transition when 's' is final), are 117 // stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()'). 118 // 119 // For variable out-degree compactors, the states_ array has size 120 // 'nstates_ + 1' and contains pointers to positions into 'compacts_'. 121 // For a given state 's', the compacted transitions of 's' are 122 // stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'. 123 // By convention, 'states_[nstates_] == ncompacts_'. 124 // 125 // In both cases, the superfinal transitons (when 's' is final, i.e. 126 // 'Final(s) != Weight::Zero()') is stored first. 127 // 128 // The unsigned type U is used to represent indices into the compacts_ 129 // array. 130 template <class E, class U> 131 class CompactFstData { 132 public: 133 typedef E CompactElement; 134 typedef U Unsigned; 135 136 CompactFstData() 137 : states_(0), 138 compacts_(0), 139 nstates_(0), 140 ncompacts_(0), 141 narcs_(0), 142 start_(kNoStateId), 143 error_(false) {} 144 145 template <class A, class Compactor> 146 CompactFstData(const Fst<A> &fst, const Compactor &compactor); 147 148 template <class Iterator, class Compactor> 149 CompactFstData(const Iterator &begin, const Iterator &end, 150 const Compactor &compactor); 151 152 ~CompactFstData() { 153 delete[] states_; 154 delete[] compacts_; 155 } 156 157 template <class Compactor> 158 static CompactFstData<E, U> *Read(istream &strm, 159 const FstReadOptions &opts, 160 const FstHeader &hdr, 161 const Compactor &compactor); 162 163 bool Write(ostream &strm, const FstWriteOptions &opts) const; 164 165 Unsigned States(ssize_t i) const { return states_[i]; } 166 const CompactElement &Compacts(size_t i) const { return compacts_[i]; } 167 size_t NumStates() const { return nstates_; } 168 size_t NumCompacts() const { return ncompacts_; } 169 size_t NumArcs() const { return narcs_; } 170 ssize_t Start() const { return start_; } 171 172 int RefCount() const { return ref_count_.count(); } 173 int IncrRefCount() { return ref_count_.Incr(); } 174 int DecrRefCount() { return ref_count_.Decr(); } 175 176 bool Error() const { return error_; } 177 178 private: 179 // Byte alignment for states and arcs in file format (version 1 only) 180 static const int kFileAlign = 16; 181 182 Unsigned *states_; 183 CompactElement *compacts_; 184 size_t nstates_; 185 size_t ncompacts_; 186 size_t narcs_; 187 ssize_t start_; 188 RefCounter ref_count_; 189 bool error_; 190 }; 191 192 template <class E, class U> 193 const int CompactFstData<E, U>::kFileAlign; 194 195 196 template <class E, class U> 197 template <class A, class C> 198 CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor) 199 : states_(0), 200 compacts_(0), 201 nstates_(0), 202 ncompacts_(0), 203 narcs_(0), 204 start_(kNoStateId), 205 error_(false) { 206 typedef typename A::StateId StateId; 207 typedef typename A::Weight Weight; 208 start_ = fst.Start(); 209 // Count # of states and arcs. 210 StateId nfinals = 0; 211 for (StateIterator< Fst<A> > siter(fst); 212 !siter.Done(); 213 siter.Next()) { 214 ++nstates_; 215 StateId s = siter.Value(); 216 for (ArcIterator< Fst<A> > aiter(fst, s); 217 !aiter.Done(); 218 aiter.Next()) 219 ++narcs_; 220 if (fst.Final(s) != Weight::Zero()) ++nfinals; 221 } 222 if (compactor.Size() == -1) { 223 states_ = new Unsigned[nstates_ + 1]; 224 ncompacts_ = narcs_ + nfinals; 225 compacts_ = new CompactElement[ncompacts_]; 226 states_[nstates_] = ncompacts_; 227 } else { 228 states_ = 0; 229 ncompacts_ = nstates_ * compactor.Size(); 230 if ((narcs_ + nfinals) != ncompacts_) { 231 FSTERROR() << "CompactFstData: compactor incompatible with fst"; 232 error_ = true; 233 return; 234 } 235 compacts_ = new CompactElement[ncompacts_]; 236 } 237 size_t pos = 0, fpos = 0; 238 for (StateId s = 0; s < nstates_; ++s) { 239 fpos = pos; 240 if (compactor.Size() == -1) 241 states_[s] = pos; 242 if (fst.Final(s) != Weight::Zero()) 243 compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel, 244 fst.Final(s), kNoStateId)); 245 for (ArcIterator< Fst<A> > aiter(fst, s); 246 !aiter.Done(); 247 aiter.Next()) { 248 compacts_[pos++] = compactor.Compact(s, aiter.Value()); 249 } 250 if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) { 251 FSTERROR() << "CompactFstData: compactor incompatible with fst"; 252 error_ = true; 253 return; 254 } 255 } 256 if (pos != ncompacts_) { 257 FSTERROR() << "CompactFstData: compactor incompatible with fst"; 258 error_ = true; 259 return; 260 } 261 } 262 263 template <class E, class U> 264 template <class Iterator, class C> 265 CompactFstData<E, U>::CompactFstData(const Iterator &begin, 266 const Iterator &end, 267 const C &compactor) 268 : states_(0), 269 compacts_(0), 270 nstates_(0), 271 ncompacts_(0), 272 narcs_(0), 273 start_(kNoStateId), 274 error_(false) { 275 typedef typename C::Arc Arc; 276 typedef typename Arc::Weight Weight; 277 if (compactor.Size() != -1) { 278 ncompacts_ = distance(begin, end); 279 if (compactor.Size() == 1) { 280 // For strings, allow implicit final weight. 281 // Empty input is the empty string. 282 if (ncompacts_ == 0) { 283 ++ncompacts_; 284 } else { 285 Arc arc = compactor.Expand(ncompacts_ - 1, 286 *(begin + (ncompacts_ - 1))); 287 if (arc.ilabel != kNoLabel) 288 ++ncompacts_; 289 } 290 } 291 if (ncompacts_ % compactor.Size()) { 292 FSTERROR() << "CompactFstData: size of input container incompatible" 293 << " with compactor"; 294 error_ = true; 295 return; 296 } 297 if (ncompacts_ == 0) 298 return; 299 start_ = 0; 300 nstates_ = ncompacts_ / compactor.Size(); 301 compacts_ = new CompactElement[ncompacts_]; 302 size_t i = 0; 303 Iterator it = begin; 304 for(; it != end; ++it, ++i){ 305 compacts_[i] = *it; 306 if (compactor.Expand(i, *it).ilabel != kNoLabel) 307 ++narcs_; 308 } 309 if (i < ncompacts_) 310 compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel, 311 Weight::One(), kNoStateId)); 312 } else { 313 if (distance(begin, end) == 0) 314 return; 315 // Count # of states, arcs and compacts. 316 Iterator it = begin; 317 for(size_t i = 0; it != end; ++it, ++i) { 318 Arc arc = compactor.Expand(i, *it); 319 if (arc.ilabel != kNoLabel) { 320 ++narcs_; 321 ++ncompacts_; 322 } else { 323 ++nstates_; 324 if (arc.weight != Weight::Zero()) 325 ++ncompacts_; 326 } 327 } 328 start_ = 0; 329 compacts_ = new CompactElement[ncompacts_]; 330 states_ = new Unsigned[nstates_ + 1]; 331 states_[nstates_] = ncompacts_; 332 size_t i = 0, s = 0; 333 for(it = begin; it != end; ++it) { 334 Arc arc = compactor.Expand(i, *it); 335 if (arc.ilabel != kNoLabel) { 336 compacts_[i++] = *it; 337 } else { 338 states_[s++] = i; 339 if (arc.weight != Weight::Zero()) 340 compacts_[i++] = *it; 341 } 342 } 343 if ((s != nstates_) || (i != ncompacts_)) { 344 FSTERROR() << "CompactFstData: ill-formed input container"; 345 error_ = true; 346 return; 347 } 348 } 349 } 350 351 template <class E, class U> 352 template <class C> 353 CompactFstData<E, U> *CompactFstData<E, U>::Read( 354 istream &strm, 355 const FstReadOptions &opts, 356 const FstHeader &hdr, 357 const C &compactor) { 358 CompactFstData<E, U> *data = new CompactFstData<E, U>(); 359 data->start_ = hdr.Start(); 360 data->nstates_ = hdr.NumStates(); 361 data->narcs_ = hdr.NumArcs(); 362 363 if (compactor.Size() == -1) { 364 data->states_ = new Unsigned[data->nstates_ + 1]; 365 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && 366 !AlignInput(strm, kFileAlign)) { 367 LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; 368 delete data; 369 return 0; 370 } 371 // TODO: memory map this 372 size_t b = (data->nstates_ + 1) * sizeof(Unsigned); 373 strm.read(reinterpret_cast<char *>(data->states_), b); 374 if (!strm) { 375 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; 376 delete data; 377 return 0; 378 } 379 } else { 380 data->states_ = 0; 381 } 382 data->ncompacts_ = compactor.Size() == -1 383 ? data->states_[data->nstates_] 384 : data->nstates_ * compactor.Size(); 385 data->compacts_ = new CompactElement[data->ncompacts_]; 386 // TODO: memory map this 387 size_t b = data->ncompacts_ * sizeof(CompactElement); 388 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && 389 !AlignInput(strm, kFileAlign)) { 390 LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; 391 delete data; 392 return 0; 393 } 394 strm.read(reinterpret_cast<char *>(data->compacts_), b); 395 if (!strm) { 396 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; 397 delete data; 398 return 0; 399 } 400 return data; 401 } 402 403 template<class E, class U> 404 bool CompactFstData<E, U>::Write(ostream &strm, 405 const FstWriteOptions &opts) const { 406 if (states_) { 407 if (opts.align && !AlignOutput(strm, kFileAlign)) { 408 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; 409 return false; 410 } 411 strm.write(reinterpret_cast<char *>(states_), 412 (nstates_ + 1) * sizeof(Unsigned)); 413 } 414 if (opts.align && !AlignOutput(strm, kFileAlign)) { 415 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; 416 return false; 417 } 418 strm.write(reinterpret_cast<char *>(compacts_), 419 ncompacts_ * sizeof(CompactElement)); 420 421 strm.flush(); 422 if (!strm) { 423 LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source; 424 return false; 425 } 426 return true; 427 } 428 429 template <class A, class C, class U> class CompactFst; 430 template <class F, class G> void Cast(const F &, G *); 431 432 // Implementation class for CompactFst, which contains CompactFstData 433 // and Fst cache. 434 template <class A, class C, class U> 435 class CompactFstImpl : public CacheImpl<A> { 436 public: 437 using FstImpl<A>::SetType; 438 using FstImpl<A>::SetProperties; 439 using FstImpl<A>::Properties; 440 using FstImpl<A>::SetInputSymbols; 441 using FstImpl<A>::SetOutputSymbols; 442 using FstImpl<A>::WriteHeader; 443 444 using CacheImpl<A>::PushArc; 445 using CacheImpl<A>::HasArcs; 446 using CacheImpl<A>::HasFinal; 447 using CacheImpl<A>::HasStart; 448 using CacheImpl<A>::SetArcs; 449 using CacheImpl<A>::SetFinal; 450 using CacheImpl<A>::SetStart; 451 452 typedef A Arc; 453 typedef typename A::Weight Weight; 454 typedef typename A::StateId StateId; 455 typedef C Compactor; 456 typedef typename C::Element CompactElement; 457 typedef U Unsigned; 458 459 CompactFstImpl() 460 : CacheImpl<A>(CompactFstOptions()), 461 compactor_(0), 462 own_compactor_(false), 463 data_(0) { 464 string type = "compact"; 465 if (sizeof(U) != sizeof(uint32)) { 466 string size; 467 Int64ToStr(8 * sizeof(U), &size); 468 type += size; 469 } 470 type += "_"; 471 type += C::Type(); 472 SetType(type); 473 SetProperties(kNullProperties | kStaticProperties); 474 } 475 476 CompactFstImpl(const Fst<Arc> &fst, const C &compactor, 477 const CompactFstOptions &opts) 478 : CacheImpl<A>(opts), 479 compactor_(new C(compactor)), 480 own_compactor_(true), 481 data_(0) { 482 Init(fst); 483 } 484 485 CompactFstImpl(const Fst<Arc> &fst, C *compactor, 486 const CompactFstOptions &opts) 487 : CacheImpl<A>(opts), 488 compactor_(compactor), 489 own_compactor_(false), 490 data_(0) { 491 Init(fst); 492 } 493 494 template <class Iterator> 495 CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor, 496 const CompactFstOptions &opts) 497 : CacheImpl<A>(opts), 498 compactor_(new C(compactor)), 499 own_compactor_(true), 500 data_(0) { 501 Init(b, e); 502 } 503 504 template <class Iterator> 505 CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor, 506 const CompactFstOptions &opts) 507 : CacheImpl<A>(opts), 508 compactor_(compactor), 509 own_compactor_(false), 510 data_(0) { 511 Init(b, e); 512 } 513 514 CompactFstImpl(const CompactFstImpl<A, C, U> &impl) 515 : CacheImpl<A>(impl), 516 compactor_(new C(*impl.compactor_)), 517 own_compactor_(true), 518 data_(impl.data_) { 519 if (data_) 520 data_->IncrRefCount(); 521 SetType(impl.Type()); 522 SetProperties(impl.Properties()); 523 SetInputSymbols(impl.InputSymbols()); 524 SetOutputSymbols(impl.OutputSymbols()); 525 } 526 527 ~CompactFstImpl(){ 528 if (own_compactor_) 529 delete compactor_; 530 if (data_ && !data_->DecrRefCount()) 531 delete data_; 532 } 533 534 StateId Start() { 535 if (!HasStart()) { 536 SetStart(data_->Start()); 537 } 538 return CacheImpl<A>::Start(); 539 } 540 541 Weight Final(StateId s) { 542 if (!HasFinal(s)) { 543 Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); 544 if ((compactor_->Size() != -1) || 545 (data_->States(s) != data_->States(s + 1))) 546 arc = ComputeArc(s, 547 compactor_->Size() == -1 548 ? data_->States(s) 549 : s * compactor_->Size()); 550 SetFinal(s, arc.ilabel == kNoLabel ? arc.weight : Weight::Zero()); 551 } 552 return CacheImpl<A>::Final(s); 553 } 554 555 StateId NumStates() const { 556 if (Properties(kError)) return 0; 557 return data_->NumStates(); 558 } 559 560 size_t NumArcs(StateId s) { 561 if (HasArcs(s)) 562 return CacheImpl<A>::NumArcs(s); 563 Unsigned i, num_arcs; 564 if (compactor_->Size() == -1) { 565 i = data_->States(s); 566 num_arcs = data_->States(s + 1) - i; 567 } else { 568 i = s * compactor_->Size(); 569 num_arcs = compactor_->Size(); 570 } 571 if (num_arcs > 0) { 572 const A &arc = ComputeArc(s, i, kArcILabelValue); 573 if (arc.ilabel == kNoStateId) { 574 --num_arcs; 575 } 576 } 577 return num_arcs; 578 } 579 580 size_t NumInputEpsilons(StateId s) { 581 if (!HasArcs(s) && !Properties(kILabelSorted)) 582 Expand(s); 583 if (HasArcs(s)) 584 return CacheImpl<A>::NumInputEpsilons(s); 585 return CountEpsilons(s, false); 586 } 587 588 size_t NumOutputEpsilons(StateId s) { 589 if (!HasArcs(s) && !Properties(kOLabelSorted)) 590 Expand(s); 591 if (HasArcs(s)) 592 return CacheImpl<A>::NumOutputEpsilons(s); 593 return CountEpsilons(s, true); 594 } 595 596 size_t CountEpsilons(StateId s, bool output_epsilons) { 597 size_t begin = compactor_->Size() == -1 ? 598 data_->States(s) : s * compactor_->Size(); 599 size_t end = compactor_->Size() == -1 ? 600 data_->States(s + 1) : (s + 1) * compactor_->Size(); 601 size_t num_eps = 0; 602 for (size_t i = begin; i < end; ++i) { 603 const A &arc = ComputeArc( 604 s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue); 605 const typename A::Label &label = 606 (output_epsilons ? arc.olabel : arc.ilabel); 607 if (label == kNoLabel) 608 continue; 609 else if (label > 0) 610 break; 611 ++num_eps; 612 } 613 return num_eps; 614 } 615 616 static CompactFstImpl<A, C, U> *Read(istream &strm, 617 const FstReadOptions &opts) { 618 CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>(); 619 FstHeader hdr; 620 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { 621 delete impl; 622 return 0; 623 } 624 625 // Ensures compatibility 626 if (hdr.Version() == kAlignedFileVersion) 627 hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); 628 629 impl->compactor_ = C::Read(strm); 630 if (!impl->compactor_) { 631 delete impl; 632 return 0; 633 } 634 impl->own_compactor_ = true; 635 impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr, 636 *impl->compactor_); 637 if (!impl->data_) { 638 delete impl; 639 return 0; 640 } 641 return impl; 642 } 643 644 bool Write(ostream &strm, const FstWriteOptions &opts) const { 645 FstHeader hdr; 646 hdr.SetStart(data_->Start()); 647 hdr.SetNumStates(data_->NumStates()); 648 hdr.SetNumArcs(data_->NumArcs()); 649 650 // Ensures compatibility 651 int file_version = opts.align ? kAlignedFileVersion : kFileVersion; 652 WriteHeader(strm, opts, file_version, &hdr); 653 654 compactor_->Write(strm); 655 return data_->Write(strm, opts); 656 } 657 658 // Provide information needed for generic state iterator 659 void InitStateIterator(StateIteratorData<A> *data) const { 660 data->base = 0; 661 data->nstates = data_->NumStates(); 662 } 663 664 void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 665 if (!HasArcs(s)) 666 Expand(s); 667 CacheImpl<A>::InitArcIterator(s, data); 668 } 669 670 Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const { 671 return compactor_->Expand(s, data_->Compacts(i), f); 672 } 673 674 void Expand(StateId s) { 675 size_t begin = compactor_->Size() == -1 ? 676 data_->States(s) : s * compactor_->Size(); 677 size_t end = compactor_->Size() == -1 ? 678 data_->States(s + 1) : (s + 1) * compactor_->Size(); 679 for (size_t i = begin; i < end; ++i) { 680 const Arc &arc = ComputeArc(s, i); 681 if (arc.ilabel == kNoLabel) continue; 682 PushArc(s, arc); 683 } 684 SetArcs(s); 685 } 686 687 template <class Iterator> 688 void SetCompactElements(const Iterator &b, const Iterator &e) { 689 if (data_ && !data_->DecrRefCount()) 690 delete data_; 691 data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); 692 } 693 694 C *GetCompactor() const { return compactor_; } 695 CompactFstData<CompactElement, U> *Data() const { return data_; } 696 697 protected: 698 template <class B, class D> 699 explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl) 700 : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())), 701 compactor_(new C(*impl.GetCompactor())), 702 own_compactor_(true), 703 data_(impl.Data()) { 704 if (data_) 705 data_->IncrRefCount(); 706 SetType(impl.Type()); 707 SetProperties(impl.Properties()); 708 SetInputSymbols(impl.InputSymbols()); 709 SetOutputSymbols(impl.OutputSymbols()); 710 } 711 712 private: 713 void Init(const Fst<Arc> &fst) { 714 string type = "compact"; 715 if (sizeof(U) != sizeof(uint32)) { 716 string size; 717 Int64ToStr(8 * sizeof(U), &size); 718 type += size; 719 } 720 type += "_"; 721 type += compactor_->Type(); 722 SetType(type); 723 SetInputSymbols(fst.InputSymbols()); 724 SetOutputSymbols(fst.OutputSymbols()); 725 data_ = new CompactFstData<CompactElement, U>(fst, *compactor_); 726 if (data_->Error()) 727 SetProperties(kError, kError); 728 uint64 copy_properties = fst.Properties(kCopyProperties, true); 729 if ((copy_properties & kError) || !compactor_->Compatible(fst)) { 730 FSTERROR() << "CompactFstImpl: input fst incompatible with compactor"; 731 SetProperties(kError, kError); 732 return; 733 } 734 SetProperties(copy_properties | kStaticProperties); 735 } 736 737 template <class Iterator> 738 void Init(const Iterator &b, const Iterator &e) { 739 string type = "compact"; 740 if (sizeof(U) != sizeof(uint32)) { 741 string size; 742 Int64ToStr(8 * sizeof(U), &size); 743 type += size; 744 } 745 type += "_"; 746 type += compactor_->Type(); 747 SetType(type); 748 SetProperties(kStaticProperties | compactor_->Properties()); 749 data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); 750 if (data_->Error()) 751 SetProperties(kError, kError); 752 } 753 754 // Properties always true of this Fst class 755 static const uint64 kStaticProperties = kExpanded; 756 // Current unaligned file format version 757 static const int kFileVersion = 2; 758 // Current aligned file format version 759 static const int kAlignedFileVersion = 1; 760 // Minimum file format version supported 761 static const int kMinFileVersion = 1; 762 763 C *compactor_; 764 bool own_compactor_; 765 CompactFstData<CompactElement, U> *data_; 766 }; 767 768 template <class A, class C, class U> 769 const uint64 CompactFstImpl<A, C, U>::kStaticProperties; 770 template <class A, class C, class U> 771 const int CompactFstImpl<A, C, U>::kFileVersion; 772 template <class A, class C, class U> 773 const int CompactFstImpl<A, C, U>::kAlignedFileVersion; 774 template <class A, class C, class U> 775 const int CompactFstImpl<A, C, U>::kMinFileVersion; 776 777 778 // CompactFst. This class attaches interface to implementation and 779 // handles reference counting, delegating most methods to 780 // ImplToExpandedFst. The unsigned type U is used to represent indices 781 // into the compact arc array (uint32 by default, declared in 782 // fst-decl.h). 783 template <class A, class C, class U> 784 class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > { 785 public: 786 friend class StateIterator< CompactFst<A, C, U> >; 787 friend class ArcIterator< CompactFst<A, C, U> >; 788 template <class F, class G> void friend Cast(const F &, G *); 789 790 typedef A Arc; 791 typedef typename A::StateId StateId; 792 typedef CompactFstImpl<A, C, U> Impl; 793 typedef CacheState<A> State; 794 typedef U Unsigned; 795 796 CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {} 797 798 explicit CompactFst(const Fst<A> &fst, const C &compactor = C(), 799 const CompactFstOptions &opts = CompactFstOptions()) 800 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} 801 802 CompactFst(const Fst<A> &fst, C *compactor, 803 const CompactFstOptions &opts = CompactFstOptions()) 804 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} 805 806 // The following 2 constructors take as input two iterators delimiting 807 // a set of (already) compacted transitions, starting with the 808 // transitions out of the initial state. The format of the input 809 // differs for fixed out-degree and variable out-degree compactors. 810 // 811 // - For fixed out-degree compactors, the final weight (encoded as a 812 // compacted transition) needs to be given only for final 813 // states. All strings (compactor of size 1) will be assume to be 814 // terminated by a final state even when the final state is not 815 // implicitely given. 816 // 817 // - For variable out-degree compactors, the final weight (encoded 818 // as a compacted transition) needs to be given for all states and 819 // must appeared first in the list (for state s, final weight of s, 820 // followed by outgoing transitons in s). 821 // 822 // These 2 constructors allows the direct construction of a CompactFst 823 // without first creating a more memory hungry 'regular' FST. This 824 // is useful when memory usage is severely constrained. 825 template <class Iterator> 826 explicit CompactFst(const Iterator &begin, const Iterator &end, 827 const C &compactor = C(), 828 const CompactFstOptions &opts = CompactFstOptions()) 829 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} 830 831 template <class Iterator> 832 CompactFst(const Iterator &begin, const Iterator &end, 833 C *compactor, const CompactFstOptions &opts = CompactFstOptions()) 834 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} 835 836 // See Fst<>::Copy() for doc. 837 CompactFst(const CompactFst<A, C, U> &fst, bool safe = false) 838 : ImplToExpandedFst<Impl>(fst, safe) {} 839 840 // Get a copy of this CompactFst. See Fst<>::Copy() for further doc. 841 virtual CompactFst<A, C, U> *Copy(bool safe = false) const { 842 return new CompactFst<A, C, U>(*this, safe); 843 } 844 845 // Read a CompactFst from an input stream; return NULL on error 846 static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) { 847 Impl* impl = Impl::Read(strm, opts); 848 return impl ? new CompactFst<A, C, U>(impl) : 0; 849 } 850 851 // Read a CompactFst from a file; return NULL on error 852 // Empty filename reads from standard input 853 static CompactFst<A, C, U> *Read(const string &filename) { 854 Impl* impl = ImplToExpandedFst<Impl>::Read(filename); 855 return impl ? new CompactFst<A, C, U>(impl) : 0; 856 } 857 858 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { 859 return GetImpl()->Write(strm, opts); 860 } 861 862 virtual bool Write(const string &filename) const { 863 return Fst<A>::WriteFile(filename); 864 } 865 866 virtual void InitStateIterator(StateIteratorData<A> *data) const { 867 GetImpl()->InitStateIterator(data); 868 } 869 870 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 871 GetImpl()->InitArcIterator(s, data); 872 } 873 874 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { 875 return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type); 876 } 877 878 template <class Iterator> 879 void SetCompactElements(const Iterator &b, const Iterator &e) { 880 GetImpl()->SetCompactElements(b, e); 881 } 882 883 private: 884 CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} 885 886 // Makes visible to friends. 887 Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } 888 889 void SetImpl(Impl *impl, bool own_impl = false) { 890 ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); 891 } 892 893 void operator=(const CompactFst<A, C, U> &fst); // disallow 894 }; 895 896 897 // Specialization for CompactFst; see generic version in fst.h 898 // for sample usage (but use the CompactFst type!). This version 899 // should inline. 900 template <class A, class C, class U> 901 class StateIterator< CompactFst<A, C, U> > { 902 public: 903 typedef typename A::StateId StateId; 904 905 explicit StateIterator(const CompactFst<A, C, U> &fst) 906 : nstates_(fst.GetImpl()->NumStates()), s_(0) {} 907 908 bool Done() const { return s_ >= nstates_; } 909 910 StateId Value() const { return s_; } 911 912 void Next() { ++s_; } 913 914 void Reset() { s_ = 0; } 915 916 private: 917 StateId nstates_; 918 StateId s_; 919 920 DISALLOW_COPY_AND_ASSIGN(StateIterator); 921 }; 922 923 // Specialization for CompactFst. 924 // Never caches, always iterates over the underlying compact elements. 925 template <class A, class C, class U> 926 class ArcIterator< CompactFst<A, C, U> > { 927 public: 928 typedef typename A::StateId StateId; 929 typedef typename C::Element CompactElement; 930 931 ArcIterator(const CompactFst<A, C, U> &fst, StateId s) 932 : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0), 933 pos_(0), flags_(kArcValueFlags) { 934 935 const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data(); 936 size_t offset; 937 if (compactor_->Size() == -1) { // Variable out-degree compactor 938 offset = data->States(s); 939 num_arcs_ = data->States(s + 1) - offset; 940 } else { // Fixed out-degree compactor 941 offset = s * compactor_->Size(); 942 num_arcs_ = compactor_->Size(); 943 } 944 if (num_arcs_ > 0) { 945 compacts_ = &(data->Compacts(offset)); 946 arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue); 947 if (arc_.ilabel == kNoStateId) { 948 ++compacts_; 949 --num_arcs_; 950 } 951 } 952 } 953 954 ~ArcIterator() {} 955 956 bool Done() const { return pos_ >= num_arcs_; } 957 958 const A& Value() const { 959 arc_ = compactor_->Expand(state_, compacts_[pos_], flags_); 960 return arc_; 961 } 962 963 void Next() { ++pos_; } 964 965 size_t Position() const { return pos_; } 966 967 void Reset() { pos_ = 0; } 968 969 void Seek(size_t pos) { pos_ = pos; } 970 971 uint32 Flags() const { return flags_; } 972 973 void SetFlags(uint32 f, uint32 m) { 974 flags_ &= ~m; 975 flags_ |= (f & kArcValueFlags); 976 } 977 978 private: 979 C *compactor_; 980 StateId state_; 981 const CompactElement *compacts_; 982 size_t pos_; 983 size_t num_arcs_; 984 mutable A arc_; 985 uint32 flags_; 986 987 DISALLOW_COPY_AND_ASSIGN(ArcIterator); 988 }; 989 990 // // Specialization for CompactFst. 991 // // This is an optionally caching arc iterator. 992 // // TODO(allauzen): implements the kArcValueFlags, the current 993 // // implementation only implements the kArcNoCache flag. 994 // template <class A, class C, class U> 995 // class ArcIterator< CompactFst<A, C, U> > { 996 // public: 997 // typedef typename A::StateId StateId; 998 999 // ArcIterator(const CompactFst<A, C, U> &fst, StateId s) 1000 // : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0), 1001 // flags_(kArcValueFlags) { 1002 // cache_data_.ref_count = 0; 1003 1004 // if (fst_.GetImpl()->HasArcs(state_)) { 1005 // fst_.GetImpl()->InitArcIterator(s, &cache_data_); 1006 // num_arcs_ = cache_data_.narcs; 1007 // return; 1008 // } 1009 1010 // const C *compactor = fst_.GetImpl()->GetCompactor(); 1011 // const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data(); 1012 // if (compactor->Size() == -1) { // Variable out-degree compactor 1013 // offset_ = data->States(s); 1014 // num_arcs_ = data->States(s + 1) - offset_; 1015 // } else { // Fixed out-degree compactor 1016 // offset_ = s * compactor->Size(); 1017 // num_arcs_ = compactor->Size(); 1018 // } 1019 // if (num_arcs_ > 0) { 1020 // const A &arc = fst_.GetImpl()->ComputeArc(s, offset_); 1021 // if (arc.ilabel == kNoStateId) { 1022 // ++offset_; 1023 // --num_arcs_; 1024 // } 1025 // } 1026 // } 1027 1028 1029 // ~ArcIterator() { 1030 // if (cache_data_.ref_count) 1031 // --(*cache_data_.ref_count); 1032 // } 1033 1034 // bool Done() const { return pos_ >= num_arcs_; } 1035 1036 // const A& Value() const { 1037 // if (cache_data_.ref_count == 0) { 1038 // if (flags_ & kArcNoCache) { 1039 // arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_); 1040 // return arc_; 1041 // } else { 1042 // fst_.GetImpl()->InitArcIterator(state_, &cache_data_); 1043 // } 1044 // } 1045 // return cache_data_.arcs[pos_]; 1046 // } 1047 1048 // void Next() { ++pos_; } 1049 1050 // size_t Position() const { return pos_; } 1051 1052 // void Reset() { pos_ = 0; } 1053 1054 // void Seek(size_t pos) { pos_ = pos; } 1055 1056 // uint32 Flags() const { return flags_; } 1057 1058 // void SetFlags(uint32 f, uint32 m) { 1059 // flags_ &= ~m; 1060 // flags_ |= f; 1061 1062 // if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0) 1063 // fst_.GetImpl()->InitArcIterator(state_, &cache_data_); 1064 // } 1065 1066 // private: 1067 // mutable const CompactFst<A, C, U> &fst_; 1068 // StateId state_; 1069 // size_t pos_; 1070 // size_t num_arcs_; 1071 // size_t offset_; 1072 // uint32 flags_; 1073 // mutable A arc_; 1074 // mutable ArcIteratorData<A> cache_data_; 1075 1076 // DISALLOW_COPY_AND_ASSIGN(ArcIterator); 1077 // }; 1078 1079 1080 // 1081 // Utility Compactors 1082 // 1083 1084 // Compactor for unweighted string FSTs 1085 template <class A> 1086 class StringCompactor { 1087 public: 1088 typedef A Arc; 1089 typedef typename A::Label Element; 1090 typedef typename A::Label Label; 1091 typedef typename A::StateId StateId; 1092 typedef typename A::Weight Weight; 1093 1094 Element Compact(StateId s, const A &arc) const { return arc.ilabel; } 1095 1096 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1097 return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId); 1098 } 1099 1100 ssize_t Size() const { return 1; } 1101 1102 uint64 Properties() const { 1103 return kString | kAcceptor | kUnweighted; 1104 } 1105 1106 bool Compatible(const Fst<A> &fst) const { 1107 uint64 props = Properties(); 1108 return fst.Properties(props, true) == props; 1109 } 1110 1111 static const string &Type() { 1112 static const string type = "string"; 1113 return type; 1114 } 1115 1116 bool Write(ostream &strm) const { return true; } 1117 1118 static StringCompactor *Read(istream &strm) { 1119 return new StringCompactor; 1120 } 1121 }; 1122 1123 1124 // Compactor for weighted string FSTs 1125 template <class A> 1126 class WeightedStringCompactor { 1127 public: 1128 typedef A Arc; 1129 typedef typename A::Label Label; 1130 typedef typename A::StateId StateId; 1131 typedef typename A::Weight Weight; 1132 typedef pair<Label, Weight> Element; 1133 1134 Element Compact(StateId s, const A &arc) const { 1135 return make_pair(arc.ilabel, arc.weight); 1136 } 1137 1138 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1139 return Arc(p.first, p.first, p.second, 1140 p.first != kNoLabel ? s + 1 : kNoStateId); 1141 } 1142 1143 ssize_t Size() const { return 1;} 1144 1145 uint64 Properties() const { 1146 return kString | kAcceptor; 1147 } 1148 1149 bool Compatible(const Fst<A> &fst) const { 1150 uint64 props = Properties(); 1151 return fst.Properties(props, true) == props; 1152 } 1153 1154 static const string &Type() { 1155 static const string type = "weighted_string"; 1156 return type; 1157 } 1158 1159 bool Write(ostream &strm) const { return true; } 1160 1161 static WeightedStringCompactor *Read(istream &strm) { 1162 return new WeightedStringCompactor; 1163 } 1164 }; 1165 1166 1167 // Compactor for unweighted acceptor FSTs 1168 template <class A> 1169 class UnweightedAcceptorCompactor { 1170 public: 1171 typedef A Arc; 1172 typedef typename A::Label Label; 1173 typedef typename A::StateId StateId; 1174 typedef typename A::Weight Weight; 1175 typedef pair<Label, StateId> Element; 1176 1177 Element Compact(StateId s, const A &arc) const { 1178 return make_pair(arc.ilabel, arc.nextstate); 1179 } 1180 1181 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1182 return Arc(p.first, p.first, Weight::One(), p.second); 1183 } 1184 1185 ssize_t Size() const { return -1;} 1186 1187 uint64 Properties() const { 1188 return kAcceptor | kUnweighted; 1189 } 1190 1191 bool Compatible(const Fst<A> &fst) const { 1192 uint64 props = Properties(); 1193 return fst.Properties(props, true) == props; 1194 } 1195 1196 static const string &Type() { 1197 static const string type = "unweighted_acceptor"; 1198 return type; 1199 } 1200 1201 bool Write(ostream &strm) const { return true; } 1202 1203 static UnweightedAcceptorCompactor *Read(istream &istrm) { 1204 return new UnweightedAcceptorCompactor; 1205 } 1206 }; 1207 1208 1209 // Compactor for weighted acceptor FSTs 1210 template <class A> 1211 class AcceptorCompactor { 1212 public: 1213 typedef A Arc; 1214 typedef typename A::Label Label; 1215 typedef typename A::StateId StateId; 1216 typedef typename A::Weight Weight; 1217 typedef pair< pair<Label, Weight>, StateId > Element; 1218 1219 Element Compact(StateId s, const A &arc) const { 1220 return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate); 1221 } 1222 1223 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1224 return Arc(p.first.first, p.first.first, p.first.second, p.second); 1225 } 1226 1227 ssize_t Size() const { return -1;} 1228 1229 uint64 Properties() const { 1230 return kAcceptor; 1231 } 1232 1233 bool Compatible(const Fst<A> &fst) const { 1234 uint64 props = Properties(); 1235 return fst.Properties(props, true) == props; 1236 } 1237 1238 static const string &Type() { 1239 static const string type = "acceptor"; 1240 return type; 1241 } 1242 1243 bool Write(ostream &strm) const { return true; } 1244 1245 static AcceptorCompactor *Read(istream &strm) { 1246 return new AcceptorCompactor; 1247 } 1248 }; 1249 1250 1251 // Compactor for unweighted FSTs 1252 template <class A> 1253 class UnweightedCompactor { 1254 public: 1255 typedef A Arc; 1256 typedef typename A::Label Label; 1257 typedef typename A::StateId StateId; 1258 typedef typename A::Weight Weight; 1259 typedef pair< pair<Label, Label>, StateId > Element; 1260 1261 Element Compact(StateId s, const A &arc) const { 1262 return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate); 1263 } 1264 1265 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1266 return Arc(p.first.first, p.first.second, Weight::One(), p.second); 1267 } 1268 1269 ssize_t Size() const { return -1; } 1270 1271 uint64 Properties() const { 1272 return kUnweighted; 1273 } 1274 1275 bool Compatible(const Fst<A> &fst) const { 1276 uint64 props = Properties(); 1277 return fst.Properties(props, true) == props; 1278 } 1279 1280 static const string &Type() { 1281 static const string type = "unweighted"; 1282 return type; 1283 } 1284 1285 bool Write(ostream &strm) const { return true; } 1286 1287 static UnweightedCompactor *Read(istream &strm) { 1288 return new UnweightedCompactor; 1289 } 1290 }; 1291 1292 1293 // Uselful aliases when using StdArc 1294 typedef CompactFst< StdArc, StringCompactor<StdArc> > 1295 StdCompactStringFst; 1296 typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> > 1297 StdCompactWeightedStringFst; 1298 typedef CompactFst<StdArc, AcceptorCompactor<StdArc> > 1299 StdCompactAcceptorFst; 1300 typedef CompactFst<StdArc, UnweightedCompactor<StdArc> > 1301 StdCompactUnweightedFst; 1302 typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > 1303 StdCompactUnweightedAcceptorFst; 1304 1305 } // namespace fst 1306 1307 #endif // FST_LIB_COMPACT_FST_H__ 1308