Home | History | Annotate | Download | only in fst
      1 // fst.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Finite-State Transducer (FST) - abstract base class definition,
     20 // state and arc iterator interface, and suggested base implementation.
     21 //
     22 
     23 #ifndef FST_LIB_FST_H__
     24 #define FST_LIB_FST_H__
     25 
     26 #include <stddef.h>
     27 #include <sys/types.h>
     28 #include <cmath>
     29 #include <string>
     30 
     31 #include <fst/compat.h>
     32 #include <fst/types.h>
     33 
     34 #include <fst/arc.h>
     35 #include <fst/properties.h>
     36 #include <fst/register.h>
     37 #include <iostream>
     38 #include <fstream>
     39 #include <sstream>
     40 #include <fst/symbol-table.h>
     41 #include <fst/util.h>
     42 
     43 
     44 DECLARE_bool(fst_align);
     45 
     46 namespace fst {
     47 
     48 bool IsFstHeader(istream &, const string &);
     49 
     50 class FstHeader;
     51 template <class A> class StateIteratorData;
     52 template <class A> class ArcIteratorData;
     53 template <class A> class MatcherBase;
     54 
     55 struct FstReadOptions {
     56   // FileReadMode(s) are advisory, there are many conditions than prevent a
     57   // file from being mapped, READ mode will be selected in these cases with
     58   // a warning indicating why it was chosen.
     59   enum FileReadMode { READ, MAP };
     60 
     61   string source;                // Where you're reading from
     62   const FstHeader *header;      // Pointer to Fst header. If non-zero, use
     63                                 // this info (don't read a stream header)
     64   const SymbolTable* isymbols;  // Pointer to input symbols. If non-zero, use
     65                                 // this info (read and skip stream isymbols)
     66   const SymbolTable* osymbols;  // Pointer to output symbols. If non-zero, use
     67                                 // this info (read and skip stream osymbols)
     68   FileReadMode mode;            // Read or map files (advisory, if possible)
     69 
     70   explicit FstReadOptions(const string& src = "<unspecified>",
     71                           const FstHeader *hdr = 0,
     72                           const SymbolTable* isym = 0,
     73                           const SymbolTable* osym = 0);
     74 
     75   explicit FstReadOptions(const string& src,
     76                           const SymbolTable* isym,
     77                           const SymbolTable* osym = 0);
     78 
     79   // Helper function to convert strings FileReadModes into their enum value.
     80   static FileReadMode ReadMode(const string &mode);
     81 };
     82 
     83 struct FstWriteOptions {
     84   string source;                 // Where you're writing to
     85   bool write_header;             // Write the header?
     86   bool write_isymbols;           // Write input symbols?
     87   bool write_osymbols;           // Write output symbols?
     88   bool align;                    // Write data aligned where appropriate;
     89                                  // this may fail on pipes
     90 
     91   explicit FstWriteOptions(const string& src = "<unspecifed>",
     92                            bool hdr = true, bool isym = true,
     93                            bool osym = true, bool alig = FLAGS_fst_align)
     94       : source(src), write_header(hdr),
     95         write_isymbols(isym), write_osymbols(osym), align(alig) {}
     96 };
     97 
     98 //
     99 // Fst HEADER CLASS
    100 //
    101 // This is the recommended Fst file header representation.
    102 //
    103 class FstHeader {
    104  public:
    105   enum {
    106     HAS_ISYMBOLS = 0x1,          // Has input symbol table
    107     HAS_OSYMBOLS = 0x2,          // Has output symbol table
    108     IS_ALIGNED   = 0x4,          // Memory-aligned (where appropriate)
    109   } Flags;
    110 
    111   FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
    112                 numstates_(0), numarcs_(0) {}
    113   const string &FstType() const { return fsttype_; }
    114   const string &ArcType() const { return arctype_; }
    115   int32 Version() const { return version_; }
    116   int32 GetFlags() const { return flags_; }
    117   uint64 Properties() const { return properties_; }
    118   int64 Start() const { return start_; }
    119   int64 NumStates() const { return numstates_; }
    120   int64 NumArcs() const { return numarcs_; }
    121 
    122   void SetFstType(const string& type) { fsttype_ = type; }
    123   void SetArcType(const string& type) { arctype_ = type; }
    124   void SetVersion(int32 version) { version_ = version; }
    125   void SetFlags(int32 flags) { flags_ = flags; }
    126   void SetProperties(uint64 properties) { properties_ = properties; }
    127   void SetStart(int64 start) { start_ = start; }
    128   void SetNumStates(int64 numstates) { numstates_ = numstates; }
    129   void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
    130 
    131   bool Read(istream &strm, const string &source, bool rewind = false);
    132   bool Write(ostream &strm, const string &source) const;
    133 
    134  private:
    135 
    136   string fsttype_;                   // E.g. "vector"
    137   string arctype_;                   // E.g. "standard"
    138   int32 version_;                    // Type version #
    139   int32 flags_;                      // File format bits
    140   uint64 properties_;                // FST property bits
    141   int64 start_;                      // Start state
    142   int64 numstates_;                  // # of states
    143   int64 numarcs_;                    // # of arcs
    144 };
    145 
    146 
    147 // Specifies matcher action.
    148 enum MatchType { MATCH_INPUT,      // Match input label.
    149                  MATCH_OUTPUT,     // Match output label.
    150                  MATCH_BOTH,       // Match input or output label.
    151                  MATCH_NONE,       // Match nothing.
    152                  MATCH_UNKNOWN };  // Match type unknown.
    153 
    154 //
    155 // Fst INTERFACE CLASS DEFINITION
    156 //
    157 
    158 // A generic FST, templated on the arc definition, with
    159 // common-demoninator methods (use StateIterator and ArcIterator to
    160 // iterate over its states and arcs).
    161 template <class A>
    162 class Fst {
    163  public:
    164   typedef A Arc;
    165   typedef typename A::Weight Weight;
    166   typedef typename A::StateId StateId;
    167 
    168   virtual ~Fst() {}
    169 
    170   virtual StateId Start() const = 0;          // Initial state
    171 
    172   virtual Weight Final(StateId) const = 0;    // State's final weight
    173 
    174   virtual size_t NumArcs(StateId) const = 0;  // State's arc count
    175 
    176   virtual size_t NumInputEpsilons(StateId)
    177       const = 0;                              // State's input epsilon count
    178 
    179   virtual size_t NumOutputEpsilons(StateId)
    180       const = 0;                              // State's output epsilon count
    181 
    182   // If test=false, return stored properties bits for mask (some poss. unknown)
    183   // If test=true, return property bits for mask (computing o.w. unknown)
    184   virtual uint64 Properties(uint64 mask, bool test)
    185       const = 0;  // Property bits
    186 
    187   virtual const string& Type() const = 0;    // Fst type name
    188 
    189   // Get a copy of this Fst. The copying behaves as follows:
    190   //
    191   // (1) The copying is constant time if safe = false or if safe = true
    192   // and is on an otherwise unaccessed Fst.
    193   //
    194   // (2) If safe = true, the copy is thread-safe in that the original
    195   // and copy can be safely accessed (but not necessarily mutated) by
    196   // separate threads. For some Fst types, 'Copy(true)' should only be
    197   // called on an Fst that has not otherwise been accessed. Its behavior
    198   // is undefined otherwise.
    199   //
    200   // (3) If a MutableFst is copied and then mutated, then the original is
    201   // unmodified and vice versa (often by a copy-on-write on the initial
    202   // mutation, which may not be constant time).
    203   virtual Fst<A> *Copy(bool safe = false) const = 0;
    204 
    205   // Read an Fst from an input stream; returns NULL on error
    206   static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
    207     FstReadOptions ropts(opts);
    208     FstHeader hdr;
    209     if (ropts.header)
    210       hdr = *opts.header;
    211     else {
    212       if (!hdr.Read(strm, opts.source))
    213         return 0;
    214       ropts.header = &hdr;
    215     }
    216     FstRegister<A> *registr = FstRegister<A>::GetRegister();
    217     const typename FstRegister<A>::Reader reader =
    218       registr->GetReader(hdr.FstType());
    219     if (!reader) {
    220       LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
    221                  << "\" (arc type = \"" << A::Type()
    222                  << "\"): " << ropts.source;
    223       return 0;
    224     }
    225     return reader(strm, ropts);
    226   };
    227 
    228   // Read an Fst from a file; return NULL on error
    229   // Empty filename reads from standard input
    230   static Fst<A> *Read(const string &filename) {
    231     if (!filename.empty()) {
    232       ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
    233       if (!strm) {
    234         LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
    235         return 0;
    236       }
    237       return Read(strm, FstReadOptions(filename));
    238     } else {
    239       return Read(cin, FstReadOptions("standard input"));
    240     }
    241   }
    242 
    243   // Write an Fst to an output stream; return false on error
    244   virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
    245     LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
    246                << " Fst type";
    247     return false;
    248   }
    249 
    250   // Write an Fst to a file; return false on error
    251   // Empty filename writes to standard output
    252   virtual bool Write(const string &filename) const {
    253     LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
    254                << " Fst type";
    255     return false;
    256   }
    257 
    258   // Return input label symbol table; return NULL if not specified
    259   virtual const SymbolTable* InputSymbols() const = 0;
    260 
    261   // Return output label symbol table; return NULL if not specified
    262   virtual const SymbolTable* OutputSymbols() const = 0;
    263 
    264   // For generic state iterator construction; not normally called
    265   // directly by users.
    266   virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
    267 
    268   // For generic arc iterator construction; not normally called
    269   // directly by users.
    270   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
    271 
    272   // For generic matcher construction; not normally called
    273   // directly by users.
    274   virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
    275 
    276  protected:
    277   bool WriteFile(const string &filename) const {
    278     if (!filename.empty()) {
    279       ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
    280       if (!strm) {
    281         LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
    282         return false;
    283       }
    284       return Write(strm, FstWriteOptions(filename));
    285     } else {
    286       return Write(cout, FstWriteOptions("standard output"));
    287     }
    288   }
    289 };
    290 
    291 
    292 //
    293 // STATE and ARC ITERATOR DEFINITIONS
    294 //
    295 
    296 // State iterator interface templated on the Arc definition; used
    297 // for StateIterator specializations returned by the InitStateIterator
    298 // Fst method.
    299 template <class A>
    300 class StateIteratorBase {
    301  public:
    302   typedef A Arc;
    303   typedef typename A::StateId StateId;
    304 
    305   virtual ~StateIteratorBase() {}
    306 
    307   bool Done() const { return Done_(); }       // End of iterator?
    308   StateId Value() const { return Value_(); }  // Current state (when !Done)
    309   void Next() { Next_(); }      // Advance to next state (when !Done)
    310   void Reset() { Reset_(); }    // Return to initial condition
    311 
    312  private:
    313   // This allows base class virtual access to non-virtual derived-
    314   // class members of the same name. It makes the derived class more
    315   // efficient to use but unsafe to further derive.
    316   virtual bool Done_() const = 0;
    317   virtual StateId Value_() const = 0;
    318   virtual void Next_() = 0;
    319   virtual void Reset_() = 0;
    320 };
    321 
    322 
    323 // StateIterator initialization data
    324 
    325 template <class A> struct StateIteratorData {
    326   StateIteratorBase<A> *base;   // Specialized iterator if non-zero
    327   typename A::StateId nstates;  // O.w. total # of states
    328 };
    329 
    330 
    331 // Generic state iterator, templated on the FST definition
    332 // - a wrapper around pointer to specific one.
    333 // Here is a typical use: \code
    334 //   for (StateIterator<StdFst> siter(fst);
    335 //        !siter.Done();
    336 //        siter.Next()) {
    337 //     StateId s = siter.Value();
    338 //     ...
    339 //   } \endcode
    340 template <class F>
    341 class StateIterator {
    342  public:
    343   typedef F FST;
    344   typedef typename F::Arc Arc;
    345   typedef typename Arc::StateId StateId;
    346 
    347   explicit StateIterator(const F &fst) : s_(0) {
    348     fst.InitStateIterator(&data_);
    349   }
    350 
    351   ~StateIterator() { if (data_.base) delete data_.base; }
    352 
    353   bool Done() const {
    354     return data_.base ? data_.base->Done() : s_ >= data_.nstates;
    355   }
    356 
    357   StateId Value() const { return data_.base ? data_.base->Value() : s_; }
    358 
    359   void Next() {
    360     if (data_.base)
    361       data_.base->Next();
    362     else
    363       ++s_;
    364   }
    365 
    366   void Reset() {
    367     if (data_.base)
    368       data_.base->Reset();
    369     else
    370       s_ = 0;
    371   }
    372 
    373  private:
    374   StateIteratorData<Arc> data_;
    375   StateId s_;
    376 
    377   DISALLOW_COPY_AND_ASSIGN(StateIterator);
    378 };
    379 
    380 
    381 // Flags to control the behavior on an arc iterator:
    382 static const uint32 kArcILabelValue    = 0x0001;  // Value() gives valid ilabel
    383 static const uint32 kArcOLabelValue    = 0x0002;  //  "       "     "    olabel
    384 static const uint32 kArcWeightValue    = 0x0004;  //  "       "     "    weight
    385 static const uint32 kArcNextStateValue = 0x0008;  //  "       "     " nextstate
    386 static const uint32 kArcNoCache   = 0x0010;       // No need to cache arcs
    387 
    388 static const uint32 kArcValueFlags =
    389                   kArcILabelValue | kArcOLabelValue |
    390                   kArcWeightValue | kArcNextStateValue;
    391 
    392 static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
    393 
    394 
    395 // Arc iterator interface, templated on the Arc definition; used
    396 // for Arc iterator specializations that are returned by the InitArcIterator
    397 // Fst method.
    398 template <class A>
    399 class ArcIteratorBase {
    400  public:
    401   typedef A Arc;
    402   typedef typename A::StateId StateId;
    403 
    404   virtual ~ArcIteratorBase() {}
    405 
    406   bool Done() const { return Done_(); }            // End of iterator?
    407   const A& Value() const { return Value_(); }      // Current arc (when !Done)
    408   void Next() { Next_(); }           // Advance to next arc (when !Done)
    409   size_t Position() const { return Position_(); }  // Return current position
    410   void Reset() { Reset_(); }         // Return to initial condition
    411   void Seek(size_t a) { Seek_(a); }  // Random arc access by position
    412   uint32 Flags() const { return Flags_(); }  // Return current behavorial flags
    413   void SetFlags(uint32 flags, uint32 mask) {  // Set behavorial flags
    414     SetFlags_(flags, mask);
    415   }
    416 
    417  private:
    418   // This allows base class virtual access to non-virtual derived-
    419   // class members of the same name. It makes the derived class more
    420   // efficient to use but unsafe to further derive.
    421   virtual bool Done_() const = 0;
    422   virtual const A& Value_() const = 0;
    423   virtual void Next_() = 0;
    424   virtual size_t Position_() const = 0;
    425   virtual void Reset_() = 0;
    426   virtual void Seek_(size_t a) = 0;
    427   virtual uint32 Flags_() const = 0;
    428   virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
    429 };
    430 
    431 
    432 // ArcIterator initialization data
    433 template <class A> struct ArcIteratorData {
    434   ArcIteratorBase<A> *base;  // Specialized iterator if non-zero
    435   const A *arcs;             // O.w. arcs pointer
    436   size_t narcs;              // ... and arc count
    437   int *ref_count;            // ... and reference count if non-zero
    438 };
    439 
    440 
    441 // Generic arc iterator, templated on the FST definition
    442 // - a wrapper around pointer to specific one.
    443 // Here is a typical use: \code
    444 //   for (ArcIterator<StdFst> aiter(fst, s));
    445 //        !aiter.Done();
    446 //         aiter.Next()) {
    447 //     StdArc &arc = aiter.Value();
    448 //     ...
    449 //   } \endcode
    450 template <class F>
    451 class ArcIterator {
    452    public:
    453   typedef F FST;
    454   typedef typename F::Arc Arc;
    455   typedef typename Arc::StateId StateId;
    456 
    457   ArcIterator(const F &fst, StateId s) : i_(0) {
    458     fst.InitArcIterator(s, &data_);
    459   }
    460 
    461   explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
    462     if (data_.ref_count)
    463       ++(*data_.ref_count);
    464   }
    465 
    466   ~ArcIterator() {
    467     if (data_.base)
    468       delete data_.base;
    469     else if (data_.ref_count)
    470       --(*data_.ref_count);
    471   }
    472 
    473   bool Done() const {
    474     return data_.base ?  data_.base->Done() : i_ >= data_.narcs;
    475   }
    476 
    477   const Arc& Value() const {
    478     return data_.base ? data_.base->Value() : data_.arcs[i_];
    479   }
    480 
    481   void Next() {
    482     if (data_.base)
    483       data_.base->Next();
    484     else
    485       ++i_;
    486   }
    487 
    488   void Reset() {
    489     if (data_.base)
    490       data_.base->Reset();
    491     else
    492       i_ = 0;
    493   }
    494 
    495   void Seek(size_t a) {
    496     if (data_.base)
    497       data_.base->Seek(a);
    498     else
    499       i_ = a;
    500   }
    501 
    502   size_t Position() const {
    503     return data_.base ? data_.base->Position() : i_;
    504   }
    505 
    506   uint32 Flags() const {
    507     if (data_.base)
    508       return data_.base->Flags();
    509     else
    510       return kArcValueFlags;
    511   }
    512 
    513   void SetFlags(uint32 flags, uint32 mask) {
    514     if (data_.base)
    515       data_.base->SetFlags(flags, mask);
    516   }
    517 
    518  private:
    519   ArcIteratorData<Arc> data_;
    520   size_t i_;
    521   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
    522 };
    523 
    524 //
    525 // MATCHER DEFINITIONS
    526 //
    527 
    528 template <class A>
    529 MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
    530   return 0;  // Use the default matcher
    531 }
    532 
    533 
    534 //
    535 // FST ACCESSORS - Useful functions in high-performance cases.
    536 //
    537 
    538 namespace internal {
    539 
    540 // General case - requires non-abstract, 'final' methods. Use for inlining.
    541 template <class F> inline
    542 typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
    543   return fst.F::Final(s);
    544 }
    545 
    546 template <class F> inline
    547 ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
    548   return fst.F::NumArcs(s);
    549 }
    550 
    551 template <class F> inline
    552 ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
    553   return fst.F::NumInputEpsilons(s);
    554 }
    555 
    556 template <class F> inline
    557 ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
    558   return fst.F::NumOutputEpsilons(s);
    559 }
    560 
    561 
    562 //  Fst<A> case - abstract methods.
    563 template <class A> inline
    564 typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
    565   return fst.Final(s);
    566 }
    567 
    568 template <class A> inline
    569 ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
    570   return fst.NumArcs(s);
    571 }
    572 
    573 template <class A> inline
    574 ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
    575   return fst.NumInputEpsilons(s);
    576 }
    577 
    578 template <class A> inline
    579 ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
    580   return fst.NumOutputEpsilons(s);
    581 }
    582 
    583 }  // namespace internal
    584 
    585 // A useful alias when using StdArc.
    586 typedef Fst<StdArc> StdFst;
    587 
    588 
    589 //
    590 //  CONSTANT DEFINITIONS
    591 //
    592 
    593 const int kNoStateId   =  -1;  // Not a valid state ID
    594 const int kNoLabel     =  -1;  // Not a valid label
    595 
    596 //
    597 // Fst IMPLEMENTATION BASE
    598 //
    599 // This is the recommended Fst implementation base class. It will
    600 // handle reference counts, property bits, type information and symbols.
    601 //
    602 
    603 template <class A> class FstImpl {
    604  public:
    605   typedef typename A::Weight Weight;
    606   typedef typename A::StateId StateId;
    607 
    608   FstImpl()
    609       : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
    610 
    611   FstImpl(const FstImpl<A> &impl)
    612       : properties_(impl.properties_), type_(impl.type_),
    613         isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
    614         osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
    615 
    616   virtual ~FstImpl() {
    617     delete isymbols_;
    618     delete osymbols_;
    619   }
    620 
    621   const string& Type() const { return type_; }
    622 
    623   void SetType(const string &type) { type_ = type; }
    624 
    625   virtual uint64 Properties() const { return properties_; }
    626 
    627   virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
    628 
    629   void SetProperties(uint64 props) {
    630     properties_ &= kError;          // kError can't be cleared
    631     properties_ |= props;
    632   }
    633 
    634   void SetProperties(uint64 props, uint64 mask) {
    635     properties_ &= ~mask | kError;  // kError can't be cleared
    636     properties_ |= props & mask;
    637   }
    638 
    639   // Allows (only) setting error bit on const FST impls
    640   void SetProperties(uint64 props, uint64 mask) const {
    641     if (mask != kError)
    642       FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
    643     properties_ |= kError;
    644   }
    645 
    646   const SymbolTable* InputSymbols() const { return isymbols_; }
    647 
    648   const SymbolTable* OutputSymbols() const { return osymbols_; }
    649 
    650   SymbolTable* InputSymbols() { return isymbols_; }
    651 
    652   SymbolTable* OutputSymbols() { return osymbols_; }
    653 
    654   void SetInputSymbols(const SymbolTable* isyms) {
    655     if (isymbols_) delete isymbols_;
    656     isymbols_ = isyms ? isyms->Copy() : 0;
    657   }
    658 
    659   void SetOutputSymbols(const SymbolTable* osyms) {
    660     if (osymbols_) delete osymbols_;
    661     osymbols_ = osyms ? osyms->Copy() : 0;
    662   }
    663 
    664   int RefCount() const {
    665     return ref_count_.count();
    666   }
    667 
    668   int IncrRefCount() {
    669     return ref_count_.Incr();
    670   }
    671 
    672   int DecrRefCount() {
    673     return ref_count_.Decr();
    674   }
    675 
    676   // Read-in header and symbols from input stream, initialize Fst, and
    677   // return the header.  If opts.header is non-null, skip read-in and
    678   // use the option value.  If opts.[io]symbols is non-null, read-in
    679   // (if present), but use the option value.
    680   bool ReadHeader(istream &strm, const FstReadOptions& opts,
    681                   int min_version, FstHeader *hdr);
    682 
    683   // Write-out header and symbols from output stream.
    684   // If a opts.header is false, skip writing header.
    685   // If opts.[io]symbols is false, skip writing those symbols.
    686   // This method is needed for Impl's that implement Write methods.
    687   void WriteHeader(ostream &strm, const FstWriteOptions& opts,
    688                    int version, FstHeader *hdr) const {
    689     if (opts.write_header) {
    690       hdr->SetFstType(type_);
    691       hdr->SetArcType(A::Type());
    692       hdr->SetVersion(version);
    693       hdr->SetProperties(properties_);
    694       int32 file_flags = 0;
    695       if (isymbols_ && opts.write_isymbols)
    696         file_flags |= FstHeader::HAS_ISYMBOLS;
    697       if (osymbols_ && opts.write_osymbols)
    698         file_flags |= FstHeader::HAS_OSYMBOLS;
    699       if (opts.align)
    700         file_flags |= FstHeader::IS_ALIGNED;
    701       hdr->SetFlags(file_flags);
    702       hdr->Write(strm, opts.source);
    703     }
    704     if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
    705     if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
    706   }
    707 
    708   // Write-out header and symbols to output stream.
    709   // If a opts.header is false, skip writing header.
    710   // If opts.[io]symbols is false, skip writing those symbols.
    711   // type is the Fst type being written.
    712   // This method is used in the cross-type serialization methods Fst::WriteFst.
    713   static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
    714                              const FstWriteOptions& opts, int version,
    715                              const string &type, uint64 properties,
    716                              FstHeader *hdr) {
    717     if (opts.write_header) {
    718       hdr->SetFstType(type);
    719       hdr->SetArcType(A::Type());
    720       hdr->SetVersion(version);
    721       hdr->SetProperties(properties);
    722       int32 file_flags = 0;
    723       if (fst.InputSymbols() && opts.write_isymbols)
    724         file_flags |= FstHeader::HAS_ISYMBOLS;
    725       if (fst.OutputSymbols() && opts.write_osymbols)
    726         file_flags |= FstHeader::HAS_OSYMBOLS;
    727       if (opts.align)
    728         file_flags |= FstHeader::IS_ALIGNED;
    729       hdr->SetFlags(file_flags);
    730       hdr->Write(strm, opts.source);
    731     }
    732     if (fst.InputSymbols() && opts.write_isymbols) {
    733       fst.InputSymbols()->Write(strm);
    734     }
    735     if (fst.OutputSymbols() && opts.write_osymbols) {
    736       fst.OutputSymbols()->Write(strm);
    737     }
    738   }
    739 
    740   // In serialization routines where the header cannot be written until after
    741   // the machine has been serialized, this routine can be called to seek to
    742   // the beginning of the file an rewrite the header with updated fields.
    743   // It repositions the file pointer back at the end of the file.
    744   // returns true on success, false on failure.
    745   static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
    746                               const FstWriteOptions& opts, int version,
    747                               const string &type, uint64 properties,
    748                               FstHeader *hdr, size_t header_offset) {
    749     strm.seekp(header_offset);
    750     if (!strm) {
    751       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
    752       return false;
    753     }
    754     WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
    755     if (!strm) {
    756       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
    757       return false;
    758     }
    759     strm.seekp(0, ios_base::end);
    760     if (!strm) {
    761       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
    762       return false;
    763     }
    764     return true;
    765   }
    766 
    767  protected:
    768   mutable uint64 properties_;           // Property bits
    769 
    770  private:
    771   string type_;                 // Unique name of Fst class
    772   SymbolTable *isymbols_;       // Ilabel symbol table
    773   SymbolTable *osymbols_;       // Olabel symbol table
    774   RefCounter ref_count_;        // Reference count
    775 
    776   void operator=(const FstImpl<A> &impl);  // disallow
    777 };
    778 
    779 template <class A> inline
    780 bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
    781                             int min_version, FstHeader *hdr) {
    782   if (opts.header)
    783     *hdr = *opts.header;
    784   else if (!hdr->Read(strm, opts.source))
    785     return false;
    786 
    787   if (FLAGS_v >= 2) {
    788     LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
    789               << ", fst_type: " << hdr->FstType()
    790               << ", arc_type: " << A::Type()
    791               << ", version: " << hdr->Version()
    792               << ", flags: " << hdr->GetFlags();
    793   }
    794 
    795   if (hdr->FstType() != type_) {
    796     LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
    797                << "\": " << opts.source;
    798     return false;
    799   }
    800   if (hdr->ArcType() != A::Type()) {
    801     LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
    802                << "\": " << opts.source;
    803     return false;
    804   }
    805   if (hdr->Version() < min_version) {
    806     LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
    807                << " Fst version: " << opts.source;
    808     return false;
    809   }
    810   properties_ = hdr->Properties();
    811   if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
    812     isymbols_ = SymbolTable::Read(strm, opts.source);
    813   if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
    814     osymbols_ =SymbolTable::Read(strm, opts.source);
    815 
    816   if (opts.isymbols) {
    817     delete isymbols_;
    818     isymbols_ = opts.isymbols->Copy();
    819   }
    820   if (opts.osymbols) {
    821     delete osymbols_;
    822     osymbols_ = opts.osymbols->Copy();
    823   }
    824   return true;
    825 }
    826 
    827 
    828 template<class Arc>
    829 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
    830 
    831 
    832 // This is a helper class template useful for attaching an Fst interface to
    833 // its implementation, handling reference counting.
    834 template < class I, class F = Fst<typename I::Arc> >
    835 class ImplToFst : public F {
    836  public:
    837   typedef typename I::Arc Arc;
    838   typedef typename Arc::Weight Weight;
    839   typedef typename Arc::StateId StateId;
    840 
    841   virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_;  }
    842 
    843   virtual StateId Start() const { return impl_->Start(); }
    844 
    845   virtual Weight Final(StateId s) const { return impl_->Final(s); }
    846 
    847   virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
    848 
    849   virtual size_t NumInputEpsilons(StateId s) const {
    850     return impl_->NumInputEpsilons(s);
    851   }
    852 
    853   virtual size_t NumOutputEpsilons(StateId s) const {
    854     return impl_->NumOutputEpsilons(s);
    855   }
    856 
    857   virtual uint64 Properties(uint64 mask, bool test) const {
    858     if (test) {
    859       uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
    860       impl_->SetProperties(testprops, knownprops);
    861       return testprops & mask;
    862     } else {
    863       return impl_->Properties(mask);
    864     }
    865   }
    866 
    867   virtual const string& Type() const { return impl_->Type(); }
    868 
    869   virtual const SymbolTable* InputSymbols() const {
    870     return impl_->InputSymbols();
    871   }
    872 
    873   virtual const SymbolTable* OutputSymbols() const {
    874     return impl_->OutputSymbols();
    875   }
    876 
    877  protected:
    878   ImplToFst() : impl_(0) {}
    879 
    880   ImplToFst(I *impl) : impl_(impl) {}
    881 
    882   ImplToFst(const ImplToFst<I, F> &fst) {
    883     impl_ = fst.impl_;
    884     impl_->IncrRefCount();
    885   }
    886 
    887   // This constructor presumes there is a copy constructor for the
    888   // implementation.
    889   ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
    890     if (safe) {
    891       impl_ = new I(*(fst.impl_));
    892     } else {
    893       impl_ = fst.impl_;
    894       impl_->IncrRefCount();
    895     }
    896   }
    897 
    898   I *GetImpl() const { return impl_; }
    899 
    900   // Change Fst implementation pointer. If 'own_impl' is true,
    901   // ownership of the input implementation is given to this
    902   // object; otherwise, the input implementation's reference count
    903   // should be incremented.
    904   void SetImpl(I *impl, bool own_impl = true) {
    905     if (!own_impl)
    906       impl->IncrRefCount();
    907     if (impl_ && !impl_->DecrRefCount()) delete impl_;
    908     impl_ = impl;
    909   }
    910 
    911  private:
    912   // Disallow
    913   ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
    914 
    915   ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
    916     FSTERROR() << "ImplToFst: Assignment operator disallowed";
    917     GetImpl()->SetProperties(kError, kError);
    918     return *this;
    919   }
    920 
    921   I *impl_;
    922 };
    923 
    924 
    925 // Converts FSTs by casting their implementations, where this makes
    926 // sense (which excludes implementations with weight-dependent virtual
    927 // methods). Must be a friend of the Fst classes involved (currently
    928 // the concrete Fsts: VectorFst, ConstFst, CompactFst).
    929 template<class F, class G> void Cast(const F &ifst, G *ofst) {
    930   ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
    931 }
    932 
    933 // Fst Serialization
    934 template <class A>
    935 void FstToString(const Fst<A> &fst, string *result) {
    936   ostringstream ostrm;
    937   fst.Write(ostrm, FstWriteOptions("FstToString"));
    938   *result = ostrm.str();
    939 }
    940 
    941 template <class A>
    942 Fst<A> *StringToFst(const string &s) {
    943   istringstream istrm(s);
    944   return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
    945 }
    946 
    947 }  // namespace fst
    948 
    949 #endif  // FST_LIB_FST_H__
    950