1 // fst.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Finite-State Transducer (FST) - abstract base class definition, 20 // state and arc iterator interface, and suggested base implementation. 21 // 22 23 #ifndef FST_LIB_FST_H__ 24 #define FST_LIB_FST_H__ 25 26 #include <stddef.h> 27 #include <sys/types.h> 28 #include <cmath> 29 #include <string> 30 31 #include <fst/compat.h> 32 #include <fst/types.h> 33 34 #include <fst/arc.h> 35 #include <fst/properties.h> 36 #include <fst/register.h> 37 #include <iostream> 38 #include <fstream> 39 #include <sstream> 40 #include <fst/symbol-table.h> 41 #include <fst/util.h> 42 43 44 DECLARE_bool(fst_align); 45 46 namespace fst { 47 48 bool IsFstHeader(istream &, const string &); 49 50 class FstHeader; 51 template <class A> class StateIteratorData; 52 template <class A> class ArcIteratorData; 53 template <class A> class MatcherBase; 54 55 struct FstReadOptions { 56 // FileReadMode(s) are advisory, there are many conditions than prevent a 57 // file from being mapped, READ mode will be selected in these cases with 58 // a warning indicating why it was chosen. 59 enum FileReadMode { READ, MAP }; 60 61 string source; // Where you're reading from 62 const FstHeader *header; // Pointer to Fst header. If non-zero, use 63 // this info (don't read a stream header) 64 const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use 65 // this info (read and skip stream isymbols) 66 const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use 67 // this info (read and skip stream osymbols) 68 FileReadMode mode; // Read or map files (advisory, if possible) 69 70 explicit FstReadOptions(const string& src = "<unspecified>", 71 const FstHeader *hdr = 0, 72 const SymbolTable* isym = 0, 73 const SymbolTable* osym = 0); 74 75 explicit FstReadOptions(const string& src, 76 const SymbolTable* isym, 77 const SymbolTable* osym = 0); 78 79 // Helper function to convert strings FileReadModes into their enum value. 80 static FileReadMode ReadMode(const string &mode); 81 }; 82 83 struct FstWriteOptions { 84 string source; // Where you're writing to 85 bool write_header; // Write the header? 86 bool write_isymbols; // Write input symbols? 87 bool write_osymbols; // Write output symbols? 88 bool align; // Write data aligned where appropriate; 89 // this may fail on pipes 90 91 explicit FstWriteOptions(const string& src = "<unspecifed>", 92 bool hdr = true, bool isym = true, 93 bool osym = true, bool alig = FLAGS_fst_align) 94 : source(src), write_header(hdr), 95 write_isymbols(isym), write_osymbols(osym), align(alig) {} 96 }; 97 98 // 99 // Fst HEADER CLASS 100 // 101 // This is the recommended Fst file header representation. 102 // 103 class FstHeader { 104 public: 105 enum { 106 HAS_ISYMBOLS = 0x1, // Has input symbol table 107 HAS_OSYMBOLS = 0x2, // Has output symbol table 108 IS_ALIGNED = 0x4, // Memory-aligned (where appropriate) 109 } Flags; 110 111 FstHeader() : version_(0), flags_(0), properties_(0), start_(-1), 112 numstates_(0), numarcs_(0) {} 113 const string &FstType() const { return fsttype_; } 114 const string &ArcType() const { return arctype_; } 115 int32 Version() const { return version_; } 116 int32 GetFlags() const { return flags_; } 117 uint64 Properties() const { return properties_; } 118 int64 Start() const { return start_; } 119 int64 NumStates() const { return numstates_; } 120 int64 NumArcs() const { return numarcs_; } 121 122 void SetFstType(const string& type) { fsttype_ = type; } 123 void SetArcType(const string& type) { arctype_ = type; } 124 void SetVersion(int32 version) { version_ = version; } 125 void SetFlags(int32 flags) { flags_ = flags; } 126 void SetProperties(uint64 properties) { properties_ = properties; } 127 void SetStart(int64 start) { start_ = start; } 128 void SetNumStates(int64 numstates) { numstates_ = numstates; } 129 void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; } 130 131 bool Read(istream &strm, const string &source, bool rewind = false); 132 bool Write(ostream &strm, const string &source) const; 133 134 private: 135 136 string fsttype_; // E.g. "vector" 137 string arctype_; // E.g. "standard" 138 int32 version_; // Type version # 139 int32 flags_; // File format bits 140 uint64 properties_; // FST property bits 141 int64 start_; // Start state 142 int64 numstates_; // # of states 143 int64 numarcs_; // # of arcs 144 }; 145 146 147 // Specifies matcher action. 148 enum MatchType { MATCH_INPUT, // Match input label. 149 MATCH_OUTPUT, // Match output label. 150 MATCH_BOTH, // Match input or output label. 151 MATCH_NONE, // Match nothing. 152 MATCH_UNKNOWN }; // Match type unknown. 153 154 // 155 // Fst INTERFACE CLASS DEFINITION 156 // 157 158 // A generic FST, templated on the arc definition, with 159 // common-demoninator methods (use StateIterator and ArcIterator to 160 // iterate over its states and arcs). 161 template <class A> 162 class Fst { 163 public: 164 typedef A Arc; 165 typedef typename A::Weight Weight; 166 typedef typename A::StateId StateId; 167 168 virtual ~Fst() {} 169 170 virtual StateId Start() const = 0; // Initial state 171 172 virtual Weight Final(StateId) const = 0; // State's final weight 173 174 virtual size_t NumArcs(StateId) const = 0; // State's arc count 175 176 virtual size_t NumInputEpsilons(StateId) 177 const = 0; // State's input epsilon count 178 179 virtual size_t NumOutputEpsilons(StateId) 180 const = 0; // State's output epsilon count 181 182 // If test=false, return stored properties bits for mask (some poss. unknown) 183 // If test=true, return property bits for mask (computing o.w. unknown) 184 virtual uint64 Properties(uint64 mask, bool test) 185 const = 0; // Property bits 186 187 virtual const string& Type() const = 0; // Fst type name 188 189 // Get a copy of this Fst. The copying behaves as follows: 190 // 191 // (1) The copying is constant time if safe = false or if safe = true 192 // and is on an otherwise unaccessed Fst. 193 // 194 // (2) If safe = true, the copy is thread-safe in that the original 195 // and copy can be safely accessed (but not necessarily mutated) by 196 // separate threads. For some Fst types, 'Copy(true)' should only be 197 // called on an Fst that has not otherwise been accessed. Its behavior 198 // is undefined otherwise. 199 // 200 // (3) If a MutableFst is copied and then mutated, then the original is 201 // unmodified and vice versa (often by a copy-on-write on the initial 202 // mutation, which may not be constant time). 203 virtual Fst<A> *Copy(bool safe = false) const = 0; 204 205 // Read an Fst from an input stream; returns NULL on error 206 static Fst<A> *Read(istream &strm, const FstReadOptions &opts) { 207 FstReadOptions ropts(opts); 208 FstHeader hdr; 209 if (ropts.header) 210 hdr = *opts.header; 211 else { 212 if (!hdr.Read(strm, opts.source)) 213 return 0; 214 ropts.header = &hdr; 215 } 216 FstRegister<A> *registr = FstRegister<A>::GetRegister(); 217 const typename FstRegister<A>::Reader reader = 218 registr->GetReader(hdr.FstType()); 219 if (!reader) { 220 LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType() 221 << "\" (arc type = \"" << A::Type() 222 << "\"): " << ropts.source; 223 return 0; 224 } 225 return reader(strm, ropts); 226 }; 227 228 // Read an Fst from a file; return NULL on error 229 // Empty filename reads from standard input 230 static Fst<A> *Read(const string &filename) { 231 if (!filename.empty()) { 232 ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); 233 if (!strm) { 234 LOG(ERROR) << "Fst::Read: Can't open file: " << filename; 235 return 0; 236 } 237 return Read(strm, FstReadOptions(filename)); 238 } else { 239 return Read(cin, FstReadOptions("standard input")); 240 } 241 } 242 243 // Write an Fst to an output stream; return false on error 244 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { 245 LOG(ERROR) << "Fst::Write: No write stream method for " << Type() 246 << " Fst type"; 247 return false; 248 } 249 250 // Write an Fst to a file; return false on error 251 // Empty filename writes to standard output 252 virtual bool Write(const string &filename) const { 253 LOG(ERROR) << "Fst::Write: No write filename method for " << Type() 254 << " Fst type"; 255 return false; 256 } 257 258 // Return input label symbol table; return NULL if not specified 259 virtual const SymbolTable* InputSymbols() const = 0; 260 261 // Return output label symbol table; return NULL if not specified 262 virtual const SymbolTable* OutputSymbols() const = 0; 263 264 // For generic state iterator construction; not normally called 265 // directly by users. 266 virtual void InitStateIterator(StateIteratorData<A> *) const = 0; 267 268 // For generic arc iterator construction; not normally called 269 // directly by users. 270 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0; 271 272 // For generic matcher construction; not normally called 273 // directly by users. 274 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const; 275 276 protected: 277 bool WriteFile(const string &filename) const { 278 if (!filename.empty()) { 279 ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); 280 if (!strm) { 281 LOG(ERROR) << "Fst::Write: Can't open file: " << filename; 282 return false; 283 } 284 return Write(strm, FstWriteOptions(filename)); 285 } else { 286 return Write(cout, FstWriteOptions("standard output")); 287 } 288 } 289 }; 290 291 292 // 293 // STATE and ARC ITERATOR DEFINITIONS 294 // 295 296 // State iterator interface templated on the Arc definition; used 297 // for StateIterator specializations returned by the InitStateIterator 298 // Fst method. 299 template <class A> 300 class StateIteratorBase { 301 public: 302 typedef A Arc; 303 typedef typename A::StateId StateId; 304 305 virtual ~StateIteratorBase() {} 306 307 bool Done() const { return Done_(); } // End of iterator? 308 StateId Value() const { return Value_(); } // Current state (when !Done) 309 void Next() { Next_(); } // Advance to next state (when !Done) 310 void Reset() { Reset_(); } // Return to initial condition 311 312 private: 313 // This allows base class virtual access to non-virtual derived- 314 // class members of the same name. It makes the derived class more 315 // efficient to use but unsafe to further derive. 316 virtual bool Done_() const = 0; 317 virtual StateId Value_() const = 0; 318 virtual void Next_() = 0; 319 virtual void Reset_() = 0; 320 }; 321 322 323 // StateIterator initialization data 324 325 template <class A> struct StateIteratorData { 326 StateIteratorBase<A> *base; // Specialized iterator if non-zero 327 typename A::StateId nstates; // O.w. total # of states 328 }; 329 330 331 // Generic state iterator, templated on the FST definition 332 // - a wrapper around pointer to specific one. 333 // Here is a typical use: \code 334 // for (StateIterator<StdFst> siter(fst); 335 // !siter.Done(); 336 // siter.Next()) { 337 // StateId s = siter.Value(); 338 // ... 339 // } \endcode 340 template <class F> 341 class StateIterator { 342 public: 343 typedef F FST; 344 typedef typename F::Arc Arc; 345 typedef typename Arc::StateId StateId; 346 347 explicit StateIterator(const F &fst) : s_(0) { 348 fst.InitStateIterator(&data_); 349 } 350 351 ~StateIterator() { if (data_.base) delete data_.base; } 352 353 bool Done() const { 354 return data_.base ? data_.base->Done() : s_ >= data_.nstates; 355 } 356 357 StateId Value() const { return data_.base ? data_.base->Value() : s_; } 358 359 void Next() { 360 if (data_.base) 361 data_.base->Next(); 362 else 363 ++s_; 364 } 365 366 void Reset() { 367 if (data_.base) 368 data_.base->Reset(); 369 else 370 s_ = 0; 371 } 372 373 private: 374 StateIteratorData<Arc> data_; 375 StateId s_; 376 377 DISALLOW_COPY_AND_ASSIGN(StateIterator); 378 }; 379 380 381 // Flags to control the behavior on an arc iterator: 382 static const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel 383 static const uint32 kArcOLabelValue = 0x0002; // " " " olabel 384 static const uint32 kArcWeightValue = 0x0004; // " " " weight 385 static const uint32 kArcNextStateValue = 0x0008; // " " " nextstate 386 static const uint32 kArcNoCache = 0x0010; // No need to cache arcs 387 388 static const uint32 kArcValueFlags = 389 kArcILabelValue | kArcOLabelValue | 390 kArcWeightValue | kArcNextStateValue; 391 392 static const uint32 kArcFlags = kArcValueFlags | kArcNoCache; 393 394 395 // Arc iterator interface, templated on the Arc definition; used 396 // for Arc iterator specializations that are returned by the InitArcIterator 397 // Fst method. 398 template <class A> 399 class ArcIteratorBase { 400 public: 401 typedef A Arc; 402 typedef typename A::StateId StateId; 403 404 virtual ~ArcIteratorBase() {} 405 406 bool Done() const { return Done_(); } // End of iterator? 407 const A& Value() const { return Value_(); } // Current arc (when !Done) 408 void Next() { Next_(); } // Advance to next arc (when !Done) 409 size_t Position() const { return Position_(); } // Return current position 410 void Reset() { Reset_(); } // Return to initial condition 411 void Seek(size_t a) { Seek_(a); } // Random arc access by position 412 uint32 Flags() const { return Flags_(); } // Return current behavorial flags 413 void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags 414 SetFlags_(flags, mask); 415 } 416 417 private: 418 // This allows base class virtual access to non-virtual derived- 419 // class members of the same name. It makes the derived class more 420 // efficient to use but unsafe to further derive. 421 virtual bool Done_() const = 0; 422 virtual const A& Value_() const = 0; 423 virtual void Next_() = 0; 424 virtual size_t Position_() const = 0; 425 virtual void Reset_() = 0; 426 virtual void Seek_(size_t a) = 0; 427 virtual uint32 Flags_() const = 0; 428 virtual void SetFlags_(uint32 flags, uint32 mask) = 0; 429 }; 430 431 432 // ArcIterator initialization data 433 template <class A> struct ArcIteratorData { 434 ArcIteratorBase<A> *base; // Specialized iterator if non-zero 435 const A *arcs; // O.w. arcs pointer 436 size_t narcs; // ... and arc count 437 int *ref_count; // ... and reference count if non-zero 438 }; 439 440 441 // Generic arc iterator, templated on the FST definition 442 // - a wrapper around pointer to specific one. 443 // Here is a typical use: \code 444 // for (ArcIterator<StdFst> aiter(fst, s)); 445 // !aiter.Done(); 446 // aiter.Next()) { 447 // StdArc &arc = aiter.Value(); 448 // ... 449 // } \endcode 450 template <class F> 451 class ArcIterator { 452 public: 453 typedef F FST; 454 typedef typename F::Arc Arc; 455 typedef typename Arc::StateId StateId; 456 457 ArcIterator(const F &fst, StateId s) : i_(0) { 458 fst.InitArcIterator(s, &data_); 459 } 460 461 explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) { 462 if (data_.ref_count) 463 ++(*data_.ref_count); 464 } 465 466 ~ArcIterator() { 467 if (data_.base) 468 delete data_.base; 469 else if (data_.ref_count) 470 --(*data_.ref_count); 471 } 472 473 bool Done() const { 474 return data_.base ? data_.base->Done() : i_ >= data_.narcs; 475 } 476 477 const Arc& Value() const { 478 return data_.base ? data_.base->Value() : data_.arcs[i_]; 479 } 480 481 void Next() { 482 if (data_.base) 483 data_.base->Next(); 484 else 485 ++i_; 486 } 487 488 void Reset() { 489 if (data_.base) 490 data_.base->Reset(); 491 else 492 i_ = 0; 493 } 494 495 void Seek(size_t a) { 496 if (data_.base) 497 data_.base->Seek(a); 498 else 499 i_ = a; 500 } 501 502 size_t Position() const { 503 return data_.base ? data_.base->Position() : i_; 504 } 505 506 uint32 Flags() const { 507 if (data_.base) 508 return data_.base->Flags(); 509 else 510 return kArcValueFlags; 511 } 512 513 void SetFlags(uint32 flags, uint32 mask) { 514 if (data_.base) 515 data_.base->SetFlags(flags, mask); 516 } 517 518 private: 519 ArcIteratorData<Arc> data_; 520 size_t i_; 521 DISALLOW_COPY_AND_ASSIGN(ArcIterator); 522 }; 523 524 // 525 // MATCHER DEFINITIONS 526 // 527 528 template <class A> 529 MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const { 530 return 0; // Use the default matcher 531 } 532 533 534 // 535 // FST ACCESSORS - Useful functions in high-performance cases. 536 // 537 538 namespace internal { 539 540 // General case - requires non-abstract, 'final' methods. Use for inlining. 541 template <class F> inline 542 typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) { 543 return fst.F::Final(s); 544 } 545 546 template <class F> inline 547 ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) { 548 return fst.F::NumArcs(s); 549 } 550 551 template <class F> inline 552 ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) { 553 return fst.F::NumInputEpsilons(s); 554 } 555 556 template <class F> inline 557 ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) { 558 return fst.F::NumOutputEpsilons(s); 559 } 560 561 562 // Fst<A> case - abstract methods. 563 template <class A> inline 564 typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) { 565 return fst.Final(s); 566 } 567 568 template <class A> inline 569 ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) { 570 return fst.NumArcs(s); 571 } 572 573 template <class A> inline 574 ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) { 575 return fst.NumInputEpsilons(s); 576 } 577 578 template <class A> inline 579 ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) { 580 return fst.NumOutputEpsilons(s); 581 } 582 583 } // namespace internal 584 585 // A useful alias when using StdArc. 586 typedef Fst<StdArc> StdFst; 587 588 589 // 590 // CONSTANT DEFINITIONS 591 // 592 593 const int kNoStateId = -1; // Not a valid state ID 594 const int kNoLabel = -1; // Not a valid label 595 596 // 597 // Fst IMPLEMENTATION BASE 598 // 599 // This is the recommended Fst implementation base class. It will 600 // handle reference counts, property bits, type information and symbols. 601 // 602 603 template <class A> class FstImpl { 604 public: 605 typedef typename A::Weight Weight; 606 typedef typename A::StateId StateId; 607 608 FstImpl() 609 : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {} 610 611 FstImpl(const FstImpl<A> &impl) 612 : properties_(impl.properties_), type_(impl.type_), 613 isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0), 614 osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {} 615 616 virtual ~FstImpl() { 617 delete isymbols_; 618 delete osymbols_; 619 } 620 621 const string& Type() const { return type_; } 622 623 void SetType(const string &type) { type_ = type; } 624 625 virtual uint64 Properties() const { return properties_; } 626 627 virtual uint64 Properties(uint64 mask) const { return properties_ & mask; } 628 629 void SetProperties(uint64 props) { 630 properties_ &= kError; // kError can't be cleared 631 properties_ |= props; 632 } 633 634 void SetProperties(uint64 props, uint64 mask) { 635 properties_ &= ~mask | kError; // kError can't be cleared 636 properties_ |= props & mask; 637 } 638 639 // Allows (only) setting error bit on const FST impls 640 void SetProperties(uint64 props, uint64 mask) const { 641 if (mask != kError) 642 FSTERROR() << "FstImpl::SetProperties() const: can only set kError"; 643 properties_ |= kError; 644 } 645 646 const SymbolTable* InputSymbols() const { return isymbols_; } 647 648 const SymbolTable* OutputSymbols() const { return osymbols_; } 649 650 SymbolTable* InputSymbols() { return isymbols_; } 651 652 SymbolTable* OutputSymbols() { return osymbols_; } 653 654 void SetInputSymbols(const SymbolTable* isyms) { 655 if (isymbols_) delete isymbols_; 656 isymbols_ = isyms ? isyms->Copy() : 0; 657 } 658 659 void SetOutputSymbols(const SymbolTable* osyms) { 660 if (osymbols_) delete osymbols_; 661 osymbols_ = osyms ? osyms->Copy() : 0; 662 } 663 664 int RefCount() const { 665 return ref_count_.count(); 666 } 667 668 int IncrRefCount() { 669 return ref_count_.Incr(); 670 } 671 672 int DecrRefCount() { 673 return ref_count_.Decr(); 674 } 675 676 // Read-in header and symbols from input stream, initialize Fst, and 677 // return the header. If opts.header is non-null, skip read-in and 678 // use the option value. If opts.[io]symbols is non-null, read-in 679 // (if present), but use the option value. 680 bool ReadHeader(istream &strm, const FstReadOptions& opts, 681 int min_version, FstHeader *hdr); 682 683 // Write-out header and symbols from output stream. 684 // If a opts.header is false, skip writing header. 685 // If opts.[io]symbols is false, skip writing those symbols. 686 // This method is needed for Impl's that implement Write methods. 687 void WriteHeader(ostream &strm, const FstWriteOptions& opts, 688 int version, FstHeader *hdr) const { 689 if (opts.write_header) { 690 hdr->SetFstType(type_); 691 hdr->SetArcType(A::Type()); 692 hdr->SetVersion(version); 693 hdr->SetProperties(properties_); 694 int32 file_flags = 0; 695 if (isymbols_ && opts.write_isymbols) 696 file_flags |= FstHeader::HAS_ISYMBOLS; 697 if (osymbols_ && opts.write_osymbols) 698 file_flags |= FstHeader::HAS_OSYMBOLS; 699 if (opts.align) 700 file_flags |= FstHeader::IS_ALIGNED; 701 hdr->SetFlags(file_flags); 702 hdr->Write(strm, opts.source); 703 } 704 if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm); 705 if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm); 706 } 707 708 // Write-out header and symbols to output stream. 709 // If a opts.header is false, skip writing header. 710 // If opts.[io]symbols is false, skip writing those symbols. 711 // type is the Fst type being written. 712 // This method is used in the cross-type serialization methods Fst::WriteFst. 713 static void WriteFstHeader(const Fst<A> &fst, ostream &strm, 714 const FstWriteOptions& opts, int version, 715 const string &type, uint64 properties, 716 FstHeader *hdr) { 717 if (opts.write_header) { 718 hdr->SetFstType(type); 719 hdr->SetArcType(A::Type()); 720 hdr->SetVersion(version); 721 hdr->SetProperties(properties); 722 int32 file_flags = 0; 723 if (fst.InputSymbols() && opts.write_isymbols) 724 file_flags |= FstHeader::HAS_ISYMBOLS; 725 if (fst.OutputSymbols() && opts.write_osymbols) 726 file_flags |= FstHeader::HAS_OSYMBOLS; 727 if (opts.align) 728 file_flags |= FstHeader::IS_ALIGNED; 729 hdr->SetFlags(file_flags); 730 hdr->Write(strm, opts.source); 731 } 732 if (fst.InputSymbols() && opts.write_isymbols) { 733 fst.InputSymbols()->Write(strm); 734 } 735 if (fst.OutputSymbols() && opts.write_osymbols) { 736 fst.OutputSymbols()->Write(strm); 737 } 738 } 739 740 // In serialization routines where the header cannot be written until after 741 // the machine has been serialized, this routine can be called to seek to 742 // the beginning of the file an rewrite the header with updated fields. 743 // It repositions the file pointer back at the end of the file. 744 // returns true on success, false on failure. 745 static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm, 746 const FstWriteOptions& opts, int version, 747 const string &type, uint64 properties, 748 FstHeader *hdr, size_t header_offset) { 749 strm.seekp(header_offset); 750 if (!strm) { 751 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; 752 return false; 753 } 754 WriteFstHeader(fst, strm, opts, version, type, properties, hdr); 755 if (!strm) { 756 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; 757 return false; 758 } 759 strm.seekp(0, ios_base::end); 760 if (!strm) { 761 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; 762 return false; 763 } 764 return true; 765 } 766 767 protected: 768 mutable uint64 properties_; // Property bits 769 770 private: 771 string type_; // Unique name of Fst class 772 SymbolTable *isymbols_; // Ilabel symbol table 773 SymbolTable *osymbols_; // Olabel symbol table 774 RefCounter ref_count_; // Reference count 775 776 void operator=(const FstImpl<A> &impl); // disallow 777 }; 778 779 template <class A> inline 780 bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts, 781 int min_version, FstHeader *hdr) { 782 if (opts.header) 783 *hdr = *opts.header; 784 else if (!hdr->Read(strm, opts.source)) 785 return false; 786 787 if (FLAGS_v >= 2) { 788 LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source 789 << ", fst_type: " << hdr->FstType() 790 << ", arc_type: " << A::Type() 791 << ", version: " << hdr->Version() 792 << ", flags: " << hdr->GetFlags(); 793 } 794 795 if (hdr->FstType() != type_) { 796 LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_ 797 << "\": " << opts.source; 798 return false; 799 } 800 if (hdr->ArcType() != A::Type()) { 801 LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type() 802 << "\": " << opts.source; 803 return false; 804 } 805 if (hdr->Version() < min_version) { 806 LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_ 807 << " Fst version: " << opts.source; 808 return false; 809 } 810 properties_ = hdr->Properties(); 811 if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS) 812 isymbols_ = SymbolTable::Read(strm, opts.source); 813 if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS) 814 osymbols_ =SymbolTable::Read(strm, opts.source); 815 816 if (opts.isymbols) { 817 delete isymbols_; 818 isymbols_ = opts.isymbols->Copy(); 819 } 820 if (opts.osymbols) { 821 delete osymbols_; 822 osymbols_ = opts.osymbols->Copy(); 823 } 824 return true; 825 } 826 827 828 template<class Arc> 829 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known); 830 831 832 // This is a helper class template useful for attaching an Fst interface to 833 // its implementation, handling reference counting. 834 template < class I, class F = Fst<typename I::Arc> > 835 class ImplToFst : public F { 836 public: 837 typedef typename I::Arc Arc; 838 typedef typename Arc::Weight Weight; 839 typedef typename Arc::StateId StateId; 840 841 virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; } 842 843 virtual StateId Start() const { return impl_->Start(); } 844 845 virtual Weight Final(StateId s) const { return impl_->Final(s); } 846 847 virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); } 848 849 virtual size_t NumInputEpsilons(StateId s) const { 850 return impl_->NumInputEpsilons(s); 851 } 852 853 virtual size_t NumOutputEpsilons(StateId s) const { 854 return impl_->NumOutputEpsilons(s); 855 } 856 857 virtual uint64 Properties(uint64 mask, bool test) const { 858 if (test) { 859 uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops); 860 impl_->SetProperties(testprops, knownprops); 861 return testprops & mask; 862 } else { 863 return impl_->Properties(mask); 864 } 865 } 866 867 virtual const string& Type() const { return impl_->Type(); } 868 869 virtual const SymbolTable* InputSymbols() const { 870 return impl_->InputSymbols(); 871 } 872 873 virtual const SymbolTable* OutputSymbols() const { 874 return impl_->OutputSymbols(); 875 } 876 877 protected: 878 ImplToFst() : impl_(0) {} 879 880 ImplToFst(I *impl) : impl_(impl) {} 881 882 ImplToFst(const ImplToFst<I, F> &fst) { 883 impl_ = fst.impl_; 884 impl_->IncrRefCount(); 885 } 886 887 // This constructor presumes there is a copy constructor for the 888 // implementation. 889 ImplToFst(const ImplToFst<I, F> &fst, bool safe) { 890 if (safe) { 891 impl_ = new I(*(fst.impl_)); 892 } else { 893 impl_ = fst.impl_; 894 impl_->IncrRefCount(); 895 } 896 } 897 898 I *GetImpl() const { return impl_; } 899 900 // Change Fst implementation pointer. If 'own_impl' is true, 901 // ownership of the input implementation is given to this 902 // object; otherwise, the input implementation's reference count 903 // should be incremented. 904 void SetImpl(I *impl, bool own_impl = true) { 905 if (!own_impl) 906 impl->IncrRefCount(); 907 if (impl_ && !impl_->DecrRefCount()) delete impl_; 908 impl_ = impl; 909 } 910 911 private: 912 // Disallow 913 ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst); 914 915 ImplToFst<I, F> &operator=(const Fst<Arc> &fst) { 916 FSTERROR() << "ImplToFst: Assignment operator disallowed"; 917 GetImpl()->SetProperties(kError, kError); 918 return *this; 919 } 920 921 I *impl_; 922 }; 923 924 925 // Converts FSTs by casting their implementations, where this makes 926 // sense (which excludes implementations with weight-dependent virtual 927 // methods). Must be a friend of the Fst classes involved (currently 928 // the concrete Fsts: VectorFst, ConstFst, CompactFst). 929 template<class F, class G> void Cast(const F &ifst, G *ofst) { 930 ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false); 931 } 932 933 // Fst Serialization 934 template <class A> 935 void FstToString(const Fst<A> &fst, string *result) { 936 ostringstream ostrm; 937 fst.Write(ostrm, FstWriteOptions("FstToString")); 938 *result = ostrm.str(); 939 } 940 941 template <class A> 942 Fst<A> *StringToFst(const string &s) { 943 istringstream istrm(s); 944 return Fst<A>::Read(istrm, FstReadOptions("StringToFst")); 945 } 946 947 } // namespace fst 948 949 #endif // FST_LIB_FST_H__ 950