1 // compact-fst.h 2 3 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // Copyright 2005-2010 Google, Inc. 17 // Author: allauzen (at) google.com (Cyril Allauzen) 18 // 19 // \file 20 // FST Class for memory-efficient representation of common types of 21 // FSTs: linear automata, acceptors, unweighted FSTs, ... 22 23 #ifndef FST_LIB_COMPACT_FST_H__ 24 #define FST_LIB_COMPACT_FST_H__ 25 26 #include <iterator> 27 #include <utility> 28 using std::pair; using std::make_pair; 29 #include <vector> 30 using std::vector; 31 32 #include <fst/cache.h> 33 #include <fst/expanded-fst.h> 34 #include <fst/fst-decl.h> // For optional argument declarations 35 #include <fst/mapped-file.h> 36 #include <fst/matcher.h> 37 #include <fst/test-properties.h> 38 #include <fst/util.h> 39 40 41 namespace fst { 42 43 struct CompactFstOptions : public CacheOptions { 44 // CompactFst default caching behaviour is to do no caching. Most 45 // compactors are cheap and therefore we save memory by not doing 46 // caching. 47 CompactFstOptions() : CacheOptions(true, 0) {} 48 CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} 49 }; 50 51 // Compactor Interface - class determinies how arcs and final weights 52 // are compacted and expanded. 53 // 54 // Final weights are treated as transitions to the superfinal state, 55 // i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId. 56 // 57 // There are two types of compactors: 58 // 59 // * Fixed out-degree compactors: 'compactor.Size()' returns a 60 // positive integer 's'. An FST can be compacted by this compactor 61 // only if each state has exactly 's' outgoing transitions (counting a 62 // non-Zero() final weight as a transition). A typical example is a 63 // compactor for string FSTs, i.e. 's == 1'. 64 // 65 // * Variable out-degree compactors: 'compactor.Size() == -1'. There 66 // are no out-degree restrictions for these compactors. 67 // 68 // 69 // class Compactor { 70 // public: 71 // // Element is the type of the compacted transitions. 72 // typedef ... Element; 73 // // Return the compacted representation of a transition 'arc' 74 // // at a state 's'. 75 // Element Compact(StateId s, const Arc &arc); 76 // // Return the transition at state 's' represented by the compacted 77 // // transition 'e'. 78 // Arc Expand(StateId s, const Element &e); 79 // // Return -1 for variable out-degree compactors, and the mandatory 80 // // out-degree otherwise. 81 // ssize_t Size(); 82 // // Test whether 'fst' can be compacted by this compactor. 83 // bool Compatible(const Fst<A> &fst); 84 // // Return the properties that are always true for an fst 85 // // compacted using this compactor 86 // uint64 Properties(); 87 // // Return a string identifying the type of compactor. 88 // static const string &Type(); 89 // // Write a compactor to a file. 90 // bool Write(ostream &strm); 91 // // Read a compactor from a file. 92 // static Compactor *Read(istream &strm); 93 // // Default constructor (optional, see comment below). 94 // Compactor(); 95 // }; 96 // 97 // The default constructor is only required for FST_REGISTER to work 98 // (i.e. enabling Convert() and the command-line utilities to work 99 // with this new compactor). However, a default constructor always 100 // needs to be specify for this code to compile, but one can have it 101 // simply raised an error when called: 102 // 103 // Compactor::Compactor() { 104 // FSTERROR() << "Compactor: no default constructor"; 105 // } 106 107 108 // Implementation data for Compact Fst, which can shared between otherwise 109 // independent copies. 110 // 111 // The implementation contains two arrays: 'states_' and 'compacts_'. 112 // 113 // For fixed out-degree compactors, the 'states_' array is unallocated. 114 // The 'compacts_' contains the compacted transitions. Its size is 115 // 'ncompacts_'. The outgoing transitions at a given state are stored 116 // consecutively. For a given state 's', its 'compactor.Size()' outgoing 117 // transitions (including superfinal transition when 's' is final), are 118 // stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()'). 119 // 120 // For variable out-degree compactors, the states_ array has size 121 // 'nstates_ + 1' and contains pointers to positions into 'compacts_'. 122 // For a given state 's', the compacted transitions of 's' are 123 // stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'. 124 // By convention, 'states_[nstates_] == ncompacts_'. 125 // 126 // In both cases, the superfinal transitons (when 's' is final, i.e. 127 // 'Final(s) != Weight::Zero()') is stored first. 128 // 129 // The unsigned type U is used to represent indices into the compacts_ 130 // array. 131 template <class E, class U> 132 class CompactFstData { 133 public: 134 typedef E CompactElement; 135 typedef U Unsigned; 136 137 CompactFstData() 138 : states_region_(0), 139 compacts_region_(0), 140 states_(0), 141 compacts_(0), 142 nstates_(0), 143 ncompacts_(0), 144 narcs_(0), 145 start_(kNoStateId), 146 error_(false) {} 147 148 template <class A, class Compactor> 149 CompactFstData(const Fst<A> &fst, const Compactor &compactor); 150 151 template <class Iterator, class Compactor> 152 CompactFstData(const Iterator &begin, const Iterator &end, 153 const Compactor &compactor); 154 155 ~CompactFstData() { 156 if (states_region_ == NULL) { 157 delete [] states_; 158 } 159 delete states_region_; 160 if (compacts_region_ == NULL) { 161 delete [] compacts_; 162 } 163 delete compacts_region_; 164 } 165 166 template <class Compactor> 167 static CompactFstData<E, U> *Read(istream &strm, 168 const FstReadOptions &opts, 169 const FstHeader &hdr, 170 const Compactor &compactor); 171 172 bool Write(ostream &strm, const FstWriteOptions &opts) const; 173 174 Unsigned States(ssize_t i) const { return states_[i]; } 175 const CompactElement &Compacts(size_t i) const { return compacts_[i]; } 176 size_t NumStates() const { return nstates_; } 177 size_t NumCompacts() const { return ncompacts_; } 178 size_t NumArcs() const { return narcs_; } 179 ssize_t Start() const { return start_; } 180 181 int RefCount() const { return ref_count_.count(); } 182 int IncrRefCount() { return ref_count_.Incr(); } 183 int DecrRefCount() { return ref_count_.Decr(); } 184 185 bool Error() const { return error_; } 186 187 private: 188 MappedFile *states_region_; 189 MappedFile *compacts_region_; 190 Unsigned *states_; 191 CompactElement *compacts_; 192 size_t nstates_; 193 size_t ncompacts_; 194 size_t narcs_; 195 ssize_t start_; 196 RefCounter ref_count_; 197 bool error_; 198 }; 199 200 template <class E, class U> 201 template <class A, class C> 202 CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor) 203 : states_region_(0), 204 compacts_region_(0), 205 states_(0), 206 compacts_(0), 207 nstates_(0), 208 ncompacts_(0), 209 narcs_(0), 210 start_(kNoStateId), 211 error_(false) { 212 typedef typename A::StateId StateId; 213 typedef typename A::Weight Weight; 214 start_ = fst.Start(); 215 // Count # of states and arcs. 216 StateId nfinals = 0; 217 for (StateIterator< Fst<A> > siter(fst); 218 !siter.Done(); 219 siter.Next()) { 220 ++nstates_; 221 StateId s = siter.Value(); 222 for (ArcIterator< Fst<A> > aiter(fst, s); 223 !aiter.Done(); 224 aiter.Next()) 225 ++narcs_; 226 if (fst.Final(s) != Weight::Zero()) ++nfinals; 227 } 228 if (compactor.Size() == -1) { 229 states_ = new Unsigned[nstates_ + 1]; 230 ncompacts_ = narcs_ + nfinals; 231 compacts_ = new CompactElement[ncompacts_]; 232 states_[nstates_] = ncompacts_; 233 } else { 234 states_ = 0; 235 ncompacts_ = nstates_ * compactor.Size(); 236 if ((narcs_ + nfinals) != ncompacts_) { 237 FSTERROR() << "CompactFstData: compactor incompatible with fst"; 238 error_ = true; 239 return; 240 } 241 compacts_ = new CompactElement[ncompacts_]; 242 } 243 size_t pos = 0, fpos = 0; 244 for (StateId s = 0; s < nstates_; ++s) { 245 fpos = pos; 246 if (compactor.Size() == -1) 247 states_[s] = pos; 248 if (fst.Final(s) != Weight::Zero()) 249 compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel, 250 fst.Final(s), kNoStateId)); 251 for (ArcIterator< Fst<A> > aiter(fst, s); 252 !aiter.Done(); 253 aiter.Next()) { 254 compacts_[pos++] = compactor.Compact(s, aiter.Value()); 255 } 256 if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) { 257 FSTERROR() << "CompactFstData: compactor incompatible with fst"; 258 error_ = true; 259 return; 260 } 261 } 262 if (pos != ncompacts_) { 263 FSTERROR() << "CompactFstData: compactor incompatible with fst"; 264 error_ = true; 265 return; 266 } 267 } 268 269 template <class E, class U> 270 template <class Iterator, class C> 271 CompactFstData<E, U>::CompactFstData(const Iterator &begin, 272 const Iterator &end, 273 const C &compactor) 274 : states_region_(0), 275 compacts_region_(0), 276 states_(0), 277 compacts_(0), 278 nstates_(0), 279 ncompacts_(0), 280 narcs_(0), 281 start_(kNoStateId), 282 error_(false) { 283 typedef typename C::Arc Arc; 284 typedef typename Arc::Weight Weight; 285 if (compactor.Size() != -1) { 286 ncompacts_ = distance(begin, end); 287 if (compactor.Size() == 1) { 288 // For strings, allow implicit final weight. 289 // Empty input is the empty string. 290 if (ncompacts_ == 0) { 291 ++ncompacts_; 292 } else { 293 Arc arc = compactor.Expand(ncompacts_ - 1, 294 *(begin + (ncompacts_ - 1))); 295 if (arc.ilabel != kNoLabel) 296 ++ncompacts_; 297 } 298 } 299 if (ncompacts_ % compactor.Size()) { 300 FSTERROR() << "CompactFstData: size of input container incompatible" 301 << " with compactor"; 302 error_ = true; 303 return; 304 } 305 if (ncompacts_ == 0) 306 return; 307 start_ = 0; 308 nstates_ = ncompacts_ / compactor.Size(); 309 compacts_ = new CompactElement[ncompacts_]; 310 size_t i = 0; 311 Iterator it = begin; 312 for(; it != end; ++it, ++i){ 313 compacts_[i] = *it; 314 if (compactor.Expand(i, *it).ilabel != kNoLabel) 315 ++narcs_; 316 } 317 if (i < ncompacts_) 318 compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel, 319 Weight::One(), kNoStateId)); 320 } else { 321 if (distance(begin, end) == 0) 322 return; 323 // Count # of states, arcs and compacts. 324 Iterator it = begin; 325 for(size_t i = 0; it != end; ++it, ++i) { 326 Arc arc = compactor.Expand(i, *it); 327 if (arc.ilabel != kNoLabel) { 328 ++narcs_; 329 ++ncompacts_; 330 } else { 331 ++nstates_; 332 if (arc.weight != Weight::Zero()) 333 ++ncompacts_; 334 } 335 } 336 start_ = 0; 337 compacts_ = new CompactElement[ncompacts_]; 338 states_ = new Unsigned[nstates_ + 1]; 339 states_[nstates_] = ncompacts_; 340 size_t i = 0, s = 0; 341 for(it = begin; it != end; ++it) { 342 Arc arc = compactor.Expand(i, *it); 343 if (arc.ilabel != kNoLabel) { 344 compacts_[i++] = *it; 345 } else { 346 states_[s++] = i; 347 if (arc.weight != Weight::Zero()) 348 compacts_[i++] = *it; 349 } 350 } 351 if ((s != nstates_) || (i != ncompacts_)) { 352 FSTERROR() << "CompactFstData: ill-formed input container"; 353 error_ = true; 354 return; 355 } 356 } 357 } 358 359 template <class E, class U> 360 template <class C> 361 CompactFstData<E, U> *CompactFstData<E, U>::Read( 362 istream &strm, 363 const FstReadOptions &opts, 364 const FstHeader &hdr, 365 const C &compactor) { 366 CompactFstData<E, U> *data = new CompactFstData<E, U>(); 367 data->start_ = hdr.Start(); 368 data->nstates_ = hdr.NumStates(); 369 data->narcs_ = hdr.NumArcs(); 370 371 if (compactor.Size() == -1) { 372 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { 373 LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; 374 delete data; 375 return 0; 376 } 377 size_t b = (data->nstates_ + 1) * sizeof(Unsigned); 378 data->states_region_ = MappedFile::Map(&strm, opts, b); 379 if (!strm || data->states_region_ == NULL) { 380 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; 381 delete data; 382 return 0; 383 } 384 data->states_ = static_cast<Unsigned *>( 385 data->states_region_->mutable_data()); 386 } else { 387 data->states_ = 0; 388 } 389 data->ncompacts_ = compactor.Size() == -1 390 ? data->states_[data->nstates_] 391 : data->nstates_ * compactor.Size(); 392 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { 393 LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; 394 delete data; 395 return 0; 396 } 397 size_t b = data->ncompacts_ * sizeof(CompactElement); 398 data->compacts_region_ = MappedFile::Map(&strm, opts, b); 399 if (!strm || data->compacts_region_ == NULL) { 400 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; 401 delete data; 402 return 0; 403 } 404 data->compacts_ = static_cast<CompactElement *>( 405 data->compacts_region_->mutable_data()); 406 return data; 407 } 408 409 template<class E, class U> 410 bool CompactFstData<E, U>::Write(ostream &strm, 411 const FstWriteOptions &opts) const { 412 if (states_) { 413 if (opts.align && !AlignOutput(strm)) { 414 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; 415 return false; 416 } 417 strm.write(reinterpret_cast<char *>(states_), 418 (nstates_ + 1) * sizeof(Unsigned)); 419 } 420 if (opts.align && !AlignOutput(strm)) { 421 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; 422 return false; 423 } 424 strm.write(reinterpret_cast<char *>(compacts_), 425 ncompacts_ * sizeof(CompactElement)); 426 427 strm.flush(); 428 if (!strm) { 429 LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source; 430 return false; 431 } 432 return true; 433 } 434 435 template <class A, class C, class U> class CompactFst; 436 template <class F, class G> void Cast(const F &, G *); 437 438 // Implementation class for CompactFst, which contains CompactFstData 439 // and Fst cache. 440 template <class A, class C, class U> 441 class CompactFstImpl : public CacheImpl<A> { 442 public: 443 using FstImpl<A>::SetType; 444 using FstImpl<A>::SetProperties; 445 using FstImpl<A>::Properties; 446 using FstImpl<A>::SetInputSymbols; 447 using FstImpl<A>::SetOutputSymbols; 448 using FstImpl<A>::WriteHeader; 449 450 using CacheImpl<A>::PushArc; 451 using CacheImpl<A>::HasArcs; 452 using CacheImpl<A>::HasFinal; 453 using CacheImpl<A>::HasStart; 454 using CacheImpl<A>::SetArcs; 455 using CacheImpl<A>::SetFinal; 456 using CacheImpl<A>::SetStart; 457 458 typedef A Arc; 459 typedef typename A::Weight Weight; 460 typedef typename A::StateId StateId; 461 typedef C Compactor; 462 typedef typename C::Element CompactElement; 463 typedef U Unsigned; 464 465 CompactFstImpl() 466 : CacheImpl<A>(CompactFstOptions()), 467 compactor_(0), 468 own_compactor_(false), 469 data_(0) { 470 string type = "compact"; 471 if (sizeof(U) != sizeof(uint32)) { 472 string size; 473 Int64ToStr(8 * sizeof(U), &size); 474 type += size; 475 } 476 type += "_"; 477 type += C::Type(); 478 SetType(type); 479 SetProperties(kNullProperties | kStaticProperties); 480 } 481 482 CompactFstImpl(const Fst<Arc> &fst, const C &compactor, 483 const CompactFstOptions &opts) 484 : CacheImpl<A>(opts), 485 compactor_(new C(compactor)), 486 own_compactor_(true), 487 data_(0) { 488 Init(fst); 489 } 490 491 CompactFstImpl(const Fst<Arc> &fst, C *compactor, 492 const CompactFstOptions &opts) 493 : CacheImpl<A>(opts), 494 compactor_(compactor), 495 own_compactor_(false), 496 data_(0) { 497 Init(fst); 498 } 499 500 template <class Iterator> 501 CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor, 502 const CompactFstOptions &opts) 503 : CacheImpl<A>(opts), 504 compactor_(new C(compactor)), 505 own_compactor_(true), 506 data_(0) { 507 Init(b, e); 508 } 509 510 template <class Iterator> 511 CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor, 512 const CompactFstOptions &opts) 513 : CacheImpl<A>(opts), 514 compactor_(compactor), 515 own_compactor_(false), 516 data_(0) { 517 Init(b, e); 518 } 519 520 CompactFstImpl(const CompactFstImpl<A, C, U> &impl) 521 : CacheImpl<A>(impl), 522 compactor_(new C(*impl.compactor_)), 523 own_compactor_(true), 524 data_(impl.data_) { 525 if (data_) 526 data_->IncrRefCount(); 527 SetType(impl.Type()); 528 SetProperties(impl.Properties()); 529 SetInputSymbols(impl.InputSymbols()); 530 SetOutputSymbols(impl.OutputSymbols()); 531 } 532 533 ~CompactFstImpl(){ 534 if (own_compactor_) 535 delete compactor_; 536 if (data_ && !data_->DecrRefCount()) 537 delete data_; 538 } 539 540 StateId Start() { 541 if (!HasStart()) { 542 SetStart(data_->Start()); 543 } 544 return CacheImpl<A>::Start(); 545 } 546 547 Weight Final(StateId s) { 548 if (HasFinal(s)) 549 return CacheImpl<A>::Final(s); 550 Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); 551 if ((compactor_->Size() != -1) || 552 (data_->States(s) != data_->States(s + 1))) 553 arc = ComputeArc(s, 554 compactor_->Size() == -1 555 ? data_->States(s) 556 : s * compactor_->Size()); 557 return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero(); 558 } 559 560 StateId NumStates() const { 561 if (Properties(kError)) return 0; 562 return data_->NumStates(); 563 } 564 565 size_t NumArcs(StateId s) { 566 if (HasArcs(s)) 567 return CacheImpl<A>::NumArcs(s); 568 Unsigned i, num_arcs; 569 if (compactor_->Size() == -1) { 570 i = data_->States(s); 571 num_arcs = data_->States(s + 1) - i; 572 } else { 573 i = s * compactor_->Size(); 574 num_arcs = compactor_->Size(); 575 } 576 if (num_arcs > 0) { 577 const A &arc = ComputeArc(s, i, kArcILabelValue); 578 if (arc.ilabel == kNoStateId) { 579 --num_arcs; 580 } 581 } 582 return num_arcs; 583 } 584 585 size_t NumInputEpsilons(StateId s) { 586 if (!HasArcs(s) && !Properties(kILabelSorted)) 587 Expand(s); 588 if (HasArcs(s)) 589 return CacheImpl<A>::NumInputEpsilons(s); 590 return CountEpsilons(s, false); 591 } 592 593 size_t NumOutputEpsilons(StateId s) { 594 if (!HasArcs(s) && !Properties(kOLabelSorted)) 595 Expand(s); 596 if (HasArcs(s)) 597 return CacheImpl<A>::NumOutputEpsilons(s); 598 return CountEpsilons(s, true); 599 } 600 601 size_t CountEpsilons(StateId s, bool output_epsilons) { 602 size_t begin = compactor_->Size() == -1 ? 603 data_->States(s) : s * compactor_->Size(); 604 size_t end = compactor_->Size() == -1 ? 605 data_->States(s + 1) : (s + 1) * compactor_->Size(); 606 size_t num_eps = 0; 607 for (size_t i = begin; i < end; ++i) { 608 const A &arc = ComputeArc( 609 s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue); 610 const typename A::Label &label = 611 (output_epsilons ? arc.olabel : arc.ilabel); 612 if (label == kNoLabel) 613 continue; 614 else if (label > 0) 615 break; 616 ++num_eps; 617 } 618 return num_eps; 619 } 620 621 static CompactFstImpl<A, C, U> *Read(istream &strm, 622 const FstReadOptions &opts) { 623 CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>(); 624 FstHeader hdr; 625 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { 626 delete impl; 627 return 0; 628 } 629 630 // Ensures compatibility 631 if (hdr.Version() == kAlignedFileVersion) 632 hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); 633 634 impl->compactor_ = C::Read(strm); 635 if (!impl->compactor_) { 636 delete impl; 637 return 0; 638 } 639 impl->own_compactor_ = true; 640 impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr, 641 *impl->compactor_); 642 if (!impl->data_) { 643 delete impl; 644 return 0; 645 } 646 return impl; 647 } 648 649 bool Write(ostream &strm, const FstWriteOptions &opts) const { 650 FstHeader hdr; 651 hdr.SetStart(data_->Start()); 652 hdr.SetNumStates(data_->NumStates()); 653 hdr.SetNumArcs(data_->NumArcs()); 654 655 // Ensures compatibility 656 int file_version = opts.align ? kAlignedFileVersion : kFileVersion; 657 WriteHeader(strm, opts, file_version, &hdr); 658 compactor_->Write(strm); 659 return data_->Write(strm, opts); 660 } 661 662 // Provide information needed for generic state iterator 663 void InitStateIterator(StateIteratorData<A> *data) const { 664 data->base = 0; 665 data->nstates = data_->NumStates(); 666 } 667 668 void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 669 if (!HasArcs(s)) 670 Expand(s); 671 CacheImpl<A>::InitArcIterator(s, data); 672 } 673 674 Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const { 675 return compactor_->Expand(s, data_->Compacts(i), f); 676 } 677 678 void Expand(StateId s) { 679 size_t begin = compactor_->Size() == -1 ? 680 data_->States(s) : s * compactor_->Size(); 681 size_t end = compactor_->Size() == -1 ? 682 data_->States(s + 1) : (s + 1) * compactor_->Size(); 683 for (size_t i = begin; i < end; ++i) { 684 const Arc &arc = ComputeArc(s, i); 685 if (arc.ilabel == kNoLabel) 686 SetFinal(s, arc.weight); 687 else 688 PushArc(s, arc); 689 } 690 if (!HasFinal(s)) 691 SetFinal(s, Weight::Zero()); 692 SetArcs(s); 693 } 694 695 template <class Iterator> 696 void SetCompactElements(const Iterator &b, const Iterator &e) { 697 if (data_ && !data_->DecrRefCount()) 698 delete data_; 699 data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); 700 } 701 702 C *GetCompactor() const { return compactor_; } 703 CompactFstData<CompactElement, U> *Data() const { return data_; } 704 705 // Properties always true of this Fst class 706 static const uint64 kStaticProperties = kExpanded; 707 708 protected: 709 template <class B, class D> 710 explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl) 711 : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())), 712 compactor_(new C(*impl.GetCompactor())), 713 own_compactor_(true), 714 data_(impl.Data()) { 715 if (data_) 716 data_->IncrRefCount(); 717 SetType(impl.Type()); 718 SetProperties(impl.Properties()); 719 SetInputSymbols(impl.InputSymbols()); 720 SetOutputSymbols(impl.OutputSymbols()); 721 } 722 723 private: 724 friend class CompactFst<A, C, U>; // allow access during write. 725 726 void Init(const Fst<Arc> &fst) { 727 string type = "compact"; 728 if (sizeof(U) != sizeof(uint32)) { 729 string size; 730 Int64ToStr(8 * sizeof(U), &size); 731 type += size; 732 } 733 type += "_"; 734 type += compactor_->Type(); 735 SetType(type); 736 SetInputSymbols(fst.InputSymbols()); 737 SetOutputSymbols(fst.OutputSymbols()); 738 data_ = new CompactFstData<CompactElement, U>(fst, *compactor_); 739 if (data_->Error()) 740 SetProperties(kError, kError); 741 uint64 copy_properties = fst.Properties(kCopyProperties, true); 742 if ((copy_properties & kError) || !compactor_->Compatible(fst)) { 743 FSTERROR() << "CompactFstImpl: input fst incompatible with compactor"; 744 SetProperties(kError, kError); 745 return; 746 } 747 SetProperties(copy_properties | kStaticProperties); 748 } 749 750 template <class Iterator> 751 void Init(const Iterator &b, const Iterator &e) { 752 string type = "compact"; 753 if (sizeof(U) != sizeof(uint32)) { 754 string size; 755 Int64ToStr(8 * sizeof(U), &size); 756 type += size; 757 } 758 type += "_"; 759 type += compactor_->Type(); 760 SetType(type); 761 SetProperties(kStaticProperties | compactor_->Properties()); 762 data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); 763 if (data_->Error()) 764 SetProperties(kError, kError); 765 } 766 767 // Current unaligned file format version 768 static const int kFileVersion = 2; 769 // Current aligned file format version 770 static const int kAlignedFileVersion = 1; 771 // Minimum file format version supported 772 static const int kMinFileVersion = 1; 773 774 C *compactor_; 775 bool own_compactor_; 776 CompactFstData<CompactElement, U> *data_; 777 }; 778 779 template <class A, class C, class U> 780 const uint64 CompactFstImpl<A, C, U>::kStaticProperties; 781 template <class A, class C, class U> 782 const int CompactFstImpl<A, C, U>::kFileVersion; 783 template <class A, class C, class U> 784 const int CompactFstImpl<A, C, U>::kAlignedFileVersion; 785 template <class A, class C, class U> 786 const int CompactFstImpl<A, C, U>::kMinFileVersion; 787 788 789 // CompactFst. This class attaches interface to implementation and 790 // handles reference counting, delegating most methods to 791 // ImplToExpandedFst. The unsigned type U is used to represent indices 792 // into the compact arc array (uint32 by default, declared in 793 // fst-decl.h). 794 template <class A, class C, class U> 795 class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > { 796 public: 797 friend class StateIterator< CompactFst<A, C, U> >; 798 friend class ArcIterator< CompactFst<A, C, U> >; 799 template <class F, class G> void friend Cast(const F &, G *); 800 801 typedef A Arc; 802 typedef typename A::StateId StateId; 803 typedef CompactFstImpl<A, C, U> Impl; 804 typedef CacheState<A> State; 805 typedef U Unsigned; 806 807 CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {} 808 809 explicit CompactFst(const Fst<A> &fst, const C &compactor = C(), 810 const CompactFstOptions &opts = CompactFstOptions()) 811 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} 812 813 CompactFst(const Fst<A> &fst, C *compactor, 814 const CompactFstOptions &opts = CompactFstOptions()) 815 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} 816 817 // The following 2 constructors take as input two iterators delimiting 818 // a set of (already) compacted transitions, starting with the 819 // transitions out of the initial state. The format of the input 820 // differs for fixed out-degree and variable out-degree compactors. 821 // 822 // - For fixed out-degree compactors, the final weight (encoded as a 823 // compacted transition) needs to be given only for final 824 // states. All strings (compactor of size 1) will be assume to be 825 // terminated by a final state even when the final state is not 826 // implicitely given. 827 // 828 // - For variable out-degree compactors, the final weight (encoded 829 // as a compacted transition) needs to be given for all states and 830 // must appeared first in the list (for state s, final weight of s, 831 // followed by outgoing transitons in s). 832 // 833 // These 2 constructors allows the direct construction of a CompactFst 834 // without first creating a more memory hungry 'regular' FST. This 835 // is useful when memory usage is severely constrained. 836 template <class Iterator> 837 explicit CompactFst(const Iterator &begin, const Iterator &end, 838 const C &compactor = C(), 839 const CompactFstOptions &opts = CompactFstOptions()) 840 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} 841 842 template <class Iterator> 843 CompactFst(const Iterator &begin, const Iterator &end, 844 C *compactor, const CompactFstOptions &opts = CompactFstOptions()) 845 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} 846 847 // See Fst<>::Copy() for doc. 848 CompactFst(const CompactFst<A, C, U> &fst, bool safe = false) 849 : ImplToExpandedFst<Impl>(fst, safe) {} 850 851 // Get a copy of this CompactFst. See Fst<>::Copy() for further doc. 852 virtual CompactFst<A, C, U> *Copy(bool safe = false) const { 853 return new CompactFst<A, C, U>(*this, safe); 854 } 855 856 // Read a CompactFst from an input stream; return NULL on error 857 static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) { 858 Impl* impl = Impl::Read(strm, opts); 859 return impl ? new CompactFst<A, C, U>(impl) : 0; 860 } 861 862 // Read a CompactFst from a file; return NULL on error 863 // Empty filename reads from standard input 864 static CompactFst<A, C, U> *Read(const string &filename) { 865 Impl* impl = ImplToExpandedFst<Impl>::Read(filename); 866 return impl ? new CompactFst<A, C, U>(impl) : 0; 867 } 868 869 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { 870 return GetImpl()->Write(strm, opts); 871 } 872 873 virtual bool Write(const string &filename) const { 874 return Fst<A>::WriteFile(filename); 875 } 876 877 template <class F> 878 static bool WriteFst(const F &fst, const C &compactor, ostream &strm, 879 const FstWriteOptions &opts); 880 881 virtual void InitStateIterator(StateIteratorData<A> *data) const { 882 GetImpl()->InitStateIterator(data); 883 } 884 885 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 886 GetImpl()->InitArcIterator(s, data); 887 } 888 889 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { 890 return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type); 891 } 892 893 template <class Iterator> 894 void SetCompactElements(const Iterator &b, const Iterator &e) { 895 GetImpl()->SetCompactElements(b, e); 896 } 897 898 private: 899 CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} 900 901 // Makes visible to friends. 902 Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } 903 904 void SetImpl(Impl *impl, bool own_impl = false) { 905 ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); 906 } 907 908 // Use overloading to extract the type of the argument. 909 static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) { 910 return compact_fst.GetImpl(); 911 } 912 913 // This does not give privileged treatment to subclasses of CompactFst. 914 template<typename NonCompactFst> 915 static Impl* GetImplIfCompactFst(const NonCompactFst& fst) { 916 return NULL; 917 } 918 919 void operator=(const CompactFst<A, C, U> &fst); // disallow 920 }; 921 922 // Writes Fst in Compact format, potentially with a pass over the machine 923 // before writing to compute the number of states and arcs. 924 // 925 template <class A, class C, class U> 926 template <class F> 927 bool CompactFst<A, C, U>::WriteFst(const F &fst, 928 const C &compactor, 929 ostream &strm, 930 const FstWriteOptions &opts) { 931 typedef U Unsigned; 932 typedef typename C::Element CompactElement; 933 typedef typename A::Weight Weight; 934 int file_version = opts.align ? 935 CompactFstImpl<A, C, U>::kAlignedFileVersion : 936 CompactFstImpl<A, C, U>::kFileVersion; 937 size_t num_arcs = -1, num_states = -1, num_compacts = -1; 938 C first_pass_compactor = compactor; 939 if (Impl* impl = GetImplIfCompactFst(fst)) { 940 num_arcs = impl->Data()->NumArcs(); 941 num_states = impl->Data()->NumStates(); 942 num_compacts = impl->Data()->NumCompacts(); 943 first_pass_compactor = *impl->GetCompactor(); 944 } else { 945 // A first pass is needed to compute the state of the compactor, which 946 // is saved ahead of the rest of the data structures. This unfortunately 947 // means forcing a complete double compaction when writing in this format. 948 // TODO(allauzen): eliminate mutable state from compactors. 949 num_arcs = 0; 950 num_states = 0; 951 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { 952 const StateId s = siter.Value(); 953 ++num_states; 954 if (fst.Final(s) != Weight::Zero()) { 955 first_pass_compactor.Compact( 956 s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); 957 } 958 for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { 959 ++num_arcs; 960 first_pass_compactor.Compact(s, aiter.Value()); 961 } 962 } 963 } 964 FstHeader hdr; 965 hdr.SetStart(fst.Start()); 966 hdr.SetNumStates(num_states); 967 hdr.SetNumArcs(num_arcs); 968 string type = "compact"; 969 if (sizeof(U) != sizeof(uint32)) { 970 string size; 971 Int64ToStr(8 * sizeof(U), &size); 972 type += size; 973 } 974 type += "_"; 975 type += C::Type(); 976 uint64 copy_properties = fst.Properties(kCopyProperties, true); 977 if ((copy_properties & kError) || !compactor.Compatible(fst)) { 978 LOG(ERROR) << "fst incompatible with compactor"; 979 return false; 980 } 981 uint64 properties = copy_properties | 982 CompactFstImpl<A, C, U>::kStaticProperties; 983 FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties, 984 &hdr); 985 first_pass_compactor.Write(strm); 986 if (first_pass_compactor.Size() == -1) { 987 if (opts.align && !AlignOutput(strm)) { 988 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; 989 return false; 990 } 991 Unsigned compacts = 0; 992 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { 993 const StateId s = siter.Value(); 994 strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); 995 if (fst.Final(s) != Weight::Zero()) { 996 ++compacts; 997 } 998 compacts += fst.NumArcs(s); 999 } 1000 strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); 1001 } 1002 if (opts.align && !AlignOutput(strm)) { 1003 LOG(ERROR) << "Could not align file during write after writing states"; 1004 } 1005 C second_pass_compactor = compactor; 1006 CompactElement element; 1007 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { 1008 const StateId s = siter.Value(); 1009 if (fst.Final(s) != Weight::Zero()) { 1010 element = second_pass_compactor.Compact( 1011 s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); 1012 strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); 1013 } 1014 for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { 1015 element = second_pass_compactor.Compact(s, aiter.Value()); 1016 strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); 1017 } 1018 } 1019 strm.flush(); 1020 if (!strm) { 1021 LOG(ERROR) << "CompactFst write failed: " << opts.source; 1022 return false; 1023 } 1024 return true; 1025 } 1026 1027 1028 // Specialization for CompactFst; see generic version in fst.h 1029 // for sample usage (but use the CompactFst type!). This version 1030 // should inline. 1031 template <class A, class C, class U> 1032 class StateIterator< CompactFst<A, C, U> > { 1033 public: 1034 typedef typename A::StateId StateId; 1035 1036 explicit StateIterator(const CompactFst<A, C, U> &fst) 1037 : nstates_(fst.GetImpl()->NumStates()), s_(0) {} 1038 1039 bool Done() const { return s_ >= nstates_; } 1040 1041 StateId Value() const { return s_; } 1042 1043 void Next() { ++s_; } 1044 1045 void Reset() { s_ = 0; } 1046 1047 private: 1048 StateId nstates_; 1049 StateId s_; 1050 1051 DISALLOW_COPY_AND_ASSIGN(StateIterator); 1052 }; 1053 1054 // Specialization for CompactFst. 1055 // Never caches, always iterates over the underlying compact elements. 1056 template <class A, class C, class U> 1057 class ArcIterator< CompactFst<A, C, U> > { 1058 public: 1059 typedef typename A::StateId StateId; 1060 typedef typename C::Element CompactElement; 1061 1062 ArcIterator(const CompactFst<A, C, U> &fst, StateId s) 1063 : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0), 1064 pos_(0), flags_(kArcValueFlags) { 1065 1066 const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data(); 1067 size_t offset; 1068 if (compactor_->Size() == -1) { // Variable out-degree compactor 1069 offset = data->States(s); 1070 num_arcs_ = data->States(s + 1) - offset; 1071 } else { // Fixed out-degree compactor 1072 offset = s * compactor_->Size(); 1073 num_arcs_ = compactor_->Size(); 1074 } 1075 if (num_arcs_ > 0) { 1076 compacts_ = &(data->Compacts(offset)); 1077 arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue); 1078 if (arc_.ilabel == kNoStateId) { 1079 ++compacts_; 1080 --num_arcs_; 1081 } 1082 } 1083 } 1084 1085 ~ArcIterator() {} 1086 1087 bool Done() const { return pos_ >= num_arcs_; } 1088 1089 const A& Value() const { 1090 arc_ = compactor_->Expand(state_, compacts_[pos_], flags_); 1091 return arc_; 1092 } 1093 1094 void Next() { ++pos_; } 1095 1096 size_t Position() const { return pos_; } 1097 1098 void Reset() { pos_ = 0; } 1099 1100 void Seek(size_t pos) { pos_ = pos; } 1101 1102 uint32 Flags() const { return flags_; } 1103 1104 void SetFlags(uint32 f, uint32 m) { 1105 flags_ &= ~m; 1106 flags_ |= (f & kArcValueFlags); 1107 } 1108 1109 private: 1110 C *compactor_; 1111 StateId state_; 1112 const CompactElement *compacts_; 1113 size_t pos_; 1114 size_t num_arcs_; 1115 mutable A arc_; 1116 uint32 flags_; 1117 1118 DISALLOW_COPY_AND_ASSIGN(ArcIterator); 1119 }; 1120 1121 // // Specialization for CompactFst. 1122 // // This is an optionally caching arc iterator. 1123 // // TODO(allauzen): implements the kArcValueFlags, the current 1124 // // implementation only implements the kArcNoCache flag. 1125 // template <class A, class C, class U> 1126 // class ArcIterator< CompactFst<A, C, U> > { 1127 // public: 1128 // typedef typename A::StateId StateId; 1129 1130 // ArcIterator(const CompactFst<A, C, U> &fst, StateId s) 1131 // : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0), 1132 // flags_(kArcValueFlags) { 1133 // cache_data_.ref_count = 0; 1134 1135 // if (fst_.GetImpl()->HasArcs(state_)) { 1136 // fst_.GetImpl()->InitArcIterator(s, &cache_data_); 1137 // num_arcs_ = cache_data_.narcs; 1138 // return; 1139 // } 1140 1141 // const C *compactor = fst_.GetImpl()->GetCompactor(); 1142 // const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data(); 1143 // if (compactor->Size() == -1) { // Variable out-degree compactor 1144 // offset_ = data->States(s); 1145 // num_arcs_ = data->States(s + 1) - offset_; 1146 // } else { // Fixed out-degree compactor 1147 // offset_ = s * compactor->Size(); 1148 // num_arcs_ = compactor->Size(); 1149 // } 1150 // if (num_arcs_ > 0) { 1151 // const A &arc = fst_.GetImpl()->ComputeArc(s, offset_); 1152 // if (arc.ilabel == kNoStateId) { 1153 // ++offset_; 1154 // --num_arcs_; 1155 // } 1156 // } 1157 // } 1158 1159 1160 // ~ArcIterator() { 1161 // if (cache_data_.ref_count) 1162 // --(*cache_data_.ref_count); 1163 // } 1164 1165 // bool Done() const { return pos_ >= num_arcs_; } 1166 1167 // const A& Value() const { 1168 // if (cache_data_.ref_count == 0) { 1169 // if (flags_ & kArcNoCache) { 1170 // arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_); 1171 // return arc_; 1172 // } else { 1173 // fst_.GetImpl()->InitArcIterator(state_, &cache_data_); 1174 // } 1175 // } 1176 // return cache_data_.arcs[pos_]; 1177 // } 1178 1179 // void Next() { ++pos_; } 1180 1181 // size_t Position() const { return pos_; } 1182 1183 // void Reset() { pos_ = 0; } 1184 1185 // void Seek(size_t pos) { pos_ = pos; } 1186 1187 // uint32 Flags() const { return flags_; } 1188 1189 // void SetFlags(uint32 f, uint32 m) { 1190 // flags_ &= ~m; 1191 // flags_ |= f; 1192 1193 // if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0) 1194 // fst_.GetImpl()->InitArcIterator(state_, &cache_data_); 1195 // } 1196 1197 // private: 1198 // mutable const CompactFst<A, C, U> &fst_; 1199 // StateId state_; 1200 // size_t pos_; 1201 // size_t num_arcs_; 1202 // size_t offset_; 1203 // uint32 flags_; 1204 // mutable A arc_; 1205 // mutable ArcIteratorData<A> cache_data_; 1206 1207 // DISALLOW_COPY_AND_ASSIGN(ArcIterator); 1208 // }; 1209 1210 1211 // 1212 // Utility Compactors 1213 // 1214 1215 // Compactor for unweighted string FSTs 1216 template <class A> 1217 class StringCompactor { 1218 public: 1219 typedef A Arc; 1220 typedef typename A::Label Element; 1221 typedef typename A::Label Label; 1222 typedef typename A::StateId StateId; 1223 typedef typename A::Weight Weight; 1224 1225 Element Compact(StateId s, const A &arc) const { return arc.ilabel; } 1226 1227 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1228 return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId); 1229 } 1230 1231 ssize_t Size() const { return 1; } 1232 1233 uint64 Properties() const { 1234 return kString | kAcceptor | kUnweighted; 1235 } 1236 1237 bool Compatible(const Fst<A> &fst) const { 1238 uint64 props = Properties(); 1239 return fst.Properties(props, true) == props; 1240 } 1241 1242 static const string &Type() { 1243 static const string type = "string"; 1244 return type; 1245 } 1246 1247 bool Write(ostream &strm) const { return true; } 1248 1249 static StringCompactor *Read(istream &strm) { 1250 return new StringCompactor; 1251 } 1252 }; 1253 1254 1255 // Compactor for weighted string FSTs 1256 template <class A> 1257 class WeightedStringCompactor { 1258 public: 1259 typedef A Arc; 1260 typedef typename A::Label Label; 1261 typedef typename A::StateId StateId; 1262 typedef typename A::Weight Weight; 1263 typedef pair<Label, Weight> Element; 1264 1265 Element Compact(StateId s, const A &arc) const { 1266 return make_pair(arc.ilabel, arc.weight); 1267 } 1268 1269 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1270 return Arc(p.first, p.first, p.second, 1271 p.first != kNoLabel ? s + 1 : kNoStateId); 1272 } 1273 1274 ssize_t Size() const { return 1;} 1275 1276 uint64 Properties() const { 1277 return kString | kAcceptor; 1278 } 1279 1280 bool Compatible(const Fst<A> &fst) const { 1281 uint64 props = Properties(); 1282 return fst.Properties(props, true) == props; 1283 } 1284 1285 static const string &Type() { 1286 static const string type = "weighted_string"; 1287 return type; 1288 } 1289 1290 bool Write(ostream &strm) const { return true; } 1291 1292 static WeightedStringCompactor *Read(istream &strm) { 1293 return new WeightedStringCompactor; 1294 } 1295 }; 1296 1297 1298 // Compactor for unweighted acceptor FSTs 1299 template <class A> 1300 class UnweightedAcceptorCompactor { 1301 public: 1302 typedef A Arc; 1303 typedef typename A::Label Label; 1304 typedef typename A::StateId StateId; 1305 typedef typename A::Weight Weight; 1306 typedef pair<Label, StateId> Element; 1307 1308 Element Compact(StateId s, const A &arc) const { 1309 return make_pair(arc.ilabel, arc.nextstate); 1310 } 1311 1312 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1313 return Arc(p.first, p.first, Weight::One(), p.second); 1314 } 1315 1316 ssize_t Size() const { return -1;} 1317 1318 uint64 Properties() const { 1319 return kAcceptor | kUnweighted; 1320 } 1321 1322 bool Compatible(const Fst<A> &fst) const { 1323 uint64 props = Properties(); 1324 return fst.Properties(props, true) == props; 1325 } 1326 1327 static const string &Type() { 1328 static const string type = "unweighted_acceptor"; 1329 return type; 1330 } 1331 1332 bool Write(ostream &strm) const { return true; } 1333 1334 static UnweightedAcceptorCompactor *Read(istream &istrm) { 1335 return new UnweightedAcceptorCompactor; 1336 } 1337 }; 1338 1339 1340 // Compactor for weighted acceptor FSTs 1341 template <class A> 1342 class AcceptorCompactor { 1343 public: 1344 typedef A Arc; 1345 typedef typename A::Label Label; 1346 typedef typename A::StateId StateId; 1347 typedef typename A::Weight Weight; 1348 typedef pair< pair<Label, Weight>, StateId > Element; 1349 1350 Element Compact(StateId s, const A &arc) const { 1351 return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate); 1352 } 1353 1354 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1355 return Arc(p.first.first, p.first.first, p.first.second, p.second); 1356 } 1357 1358 ssize_t Size() const { return -1;} 1359 1360 uint64 Properties() const { 1361 return kAcceptor; 1362 } 1363 1364 bool Compatible(const Fst<A> &fst) const { 1365 uint64 props = Properties(); 1366 return fst.Properties(props, true) == props; 1367 } 1368 1369 static const string &Type() { 1370 static const string type = "acceptor"; 1371 return type; 1372 } 1373 1374 bool Write(ostream &strm) const { return true; } 1375 1376 static AcceptorCompactor *Read(istream &strm) { 1377 return new AcceptorCompactor; 1378 } 1379 }; 1380 1381 1382 // Compactor for unweighted FSTs 1383 template <class A> 1384 class UnweightedCompactor { 1385 public: 1386 typedef A Arc; 1387 typedef typename A::Label Label; 1388 typedef typename A::StateId StateId; 1389 typedef typename A::Weight Weight; 1390 typedef pair< pair<Label, Label>, StateId > Element; 1391 1392 Element Compact(StateId s, const A &arc) const { 1393 return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate); 1394 } 1395 1396 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { 1397 return Arc(p.first.first, p.first.second, Weight::One(), p.second); 1398 } 1399 1400 ssize_t Size() const { return -1; } 1401 1402 uint64 Properties() const { 1403 return kUnweighted; 1404 } 1405 1406 bool Compatible(const Fst<A> &fst) const { 1407 uint64 props = Properties(); 1408 return fst.Properties(props, true) == props; 1409 } 1410 1411 static const string &Type() { 1412 static const string type = "unweighted"; 1413 return type; 1414 } 1415 1416 bool Write(ostream &strm) const { return true; } 1417 1418 static UnweightedCompactor *Read(istream &strm) { 1419 return new UnweightedCompactor; 1420 } 1421 }; 1422 1423 1424 // Uselful aliases when using StdArc 1425 typedef CompactFst< StdArc, StringCompactor<StdArc> > 1426 StdCompactStringFst; 1427 typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> > 1428 StdCompactWeightedStringFst; 1429 typedef CompactFst<StdArc, AcceptorCompactor<StdArc> > 1430 StdCompactAcceptorFst; 1431 typedef CompactFst<StdArc, UnweightedCompactor<StdArc> > 1432 StdCompactUnweightedFst; 1433 typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > 1434 StdCompactUnweightedAcceptorFst; 1435 1436 } // namespace fst 1437 1438 #endif // FST_LIB_COMPACT_FST_H__ 1439