Home | History | Annotate | Download | only in fst
      1 // fst.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Finite-State Transducer (FST) - abstract base class definition,
     20 // state and arc iterator interface, and suggested base implementation.
     21 //
     22 
     23 #ifndef FST_LIB_FST_H__
     24 #define FST_LIB_FST_H__
     25 
     26 #include <stddef.h>
     27 #include <sys/types.h>
     28 #include <cmath>
     29 #include <string>
     30 
     31 #include <fst/compat.h>
     32 #include <fst/types.h>
     33 
     34 #include <fst/arc.h>
     35 #include <fst/properties.h>
     36 #include <fst/register.h>
     37 #include <iostream>
     38 #include <fstream>
     39 #include <fst/symbol-table.h>
     40 #include <fst/util.h>
     41 
     42 
     43 DECLARE_bool(fst_align);
     44 
     45 namespace fst {
     46 
     47 bool IsFstHeader(istream &, const string &);
     48 
     49 class FstHeader;
     50 template <class A> class StateIteratorData;
     51 template <class A> class ArcIteratorData;
     52 template <class A> class MatcherBase;
     53 
     54 struct FstReadOptions {
     55   string source;                // Where you're reading from
     56   const FstHeader *header;      // Pointer to Fst header. If non-zero, use
     57                                 // this info (don't read a stream header)
     58   const SymbolTable* isymbols;  // Pointer to input symbols. If non-zero, use
     59                                 // this info (read and skip stream isymbols)
     60   const SymbolTable* osymbols;  // Pointer to output symbols. If non-zero, use
     61                                 // this info (read and skip stream osymbols)
     62 
     63   explicit FstReadOptions(const string& src = "<unspecfied>",
     64                           const FstHeader *hdr = 0,
     65                           const SymbolTable* isym = 0,
     66                           const SymbolTable* osym = 0)
     67       : source(src), header(hdr), isymbols(isym), osymbols(osym) {}
     68 
     69   explicit FstReadOptions(const string& src,
     70                           const SymbolTable* isym,
     71                           const SymbolTable* osym = 0)
     72       : source(src), header(0), isymbols(isym), osymbols(osym) {}
     73 };
     74 
     75 
     76 struct FstWriteOptions {
     77   string source;                 // Where you're writing to
     78   bool write_header;             // Write the header?
     79   bool write_isymbols;           // Write input symbols?
     80   bool write_osymbols;           // Write output symbols?
     81   bool align;                    // Write data aligned where appropriate;
     82                                  // this may fail on pipes
     83 
     84   explicit FstWriteOptions(const string& src = "<unspecifed>",
     85                            bool hdr = true, bool isym = true,
     86                            bool osym = true, bool alig = FLAGS_fst_align)
     87       : source(src), write_header(hdr),
     88         write_isymbols(isym), write_osymbols(osym), align(alig) {}
     89 };
     90 
     91 //
     92 // Fst HEADER CLASS
     93 //
     94 // This is the recommended Fst file header representation.
     95 //
     96 class FstHeader {
     97  public:
     98   enum {
     99     HAS_ISYMBOLS = 0x1,          // Has input symbol table
    100     HAS_OSYMBOLS = 0x2,          // Has output symbol table
    101     IS_ALIGNED   = 0x4,          // Memory-aligned (where appropriate)
    102   } Flags;
    103 
    104   FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
    105                 numstates_(0), numarcs_(0) {}
    106   const string &FstType() const { return fsttype_; }
    107   const string &ArcType() const { return arctype_; }
    108   int32 Version() const { return version_; }
    109   int32 GetFlags() const { return flags_; }
    110   uint64 Properties() const { return properties_; }
    111   int64 Start() const { return start_; }
    112   int64 NumStates() const { return numstates_; }
    113   int64 NumArcs() const { return numarcs_; }
    114 
    115   void SetFstType(const string& type) { fsttype_ = type; }
    116   void SetArcType(const string& type) { arctype_ = type; }
    117   void SetVersion(int32 version) { version_ = version; }
    118   void SetFlags(int32 flags) { flags_ = flags; }
    119   void SetProperties(uint64 properties) { properties_ = properties; }
    120   void SetStart(int64 start) { start_ = start; }
    121   void SetNumStates(int64 numstates) { numstates_ = numstates; }
    122   void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
    123 
    124   bool Read(istream &strm, const string &source, bool rewind = false);
    125   bool Write(ostream &strm, const string &source) const;
    126 
    127  private:
    128 
    129   string fsttype_;                   // E.g. "vector"
    130   string arctype_;                   // E.g. "standard"
    131   int32 version_;                    // Type version #
    132   int32 flags_;                      // File format bits
    133   uint64 properties_;                // FST property bits
    134   int64 start_;                      // Start state
    135   int64 numstates_;                  // # of states
    136   int64 numarcs_;                    // # of arcs
    137 };
    138 
    139 
    140 // Specifies matcher action.
    141 enum MatchType { MATCH_INPUT,      // Match input label.
    142                  MATCH_OUTPUT,     // Match output label.
    143                  MATCH_BOTH,       // Match input or output label.
    144                  MATCH_NONE,       // Match nothing.
    145                  MATCH_UNKNOWN };  // Match type unknown.
    146 
    147 //
    148 // Fst INTERFACE CLASS DEFINITION
    149 //
    150 
    151 // A generic FST, templated on the arc definition, with
    152 // common-demoninator methods (use StateIterator and ArcIterator to
    153 // iterate over its states and arcs).
    154 template <class A>
    155 class Fst {
    156  public:
    157   typedef A Arc;
    158   typedef typename A::Weight Weight;
    159   typedef typename A::StateId StateId;
    160 
    161   virtual ~Fst() {}
    162 
    163   virtual StateId Start() const = 0;          // Initial state
    164 
    165   virtual Weight Final(StateId) const = 0;    // State's final weight
    166 
    167   virtual size_t NumArcs(StateId) const = 0;  // State's arc count
    168 
    169   virtual size_t NumInputEpsilons(StateId)
    170       const = 0;                              // State's input epsilon count
    171 
    172   virtual size_t NumOutputEpsilons(StateId)
    173       const = 0;                              // State's output epsilon count
    174 
    175   // If test=false, return stored properties bits for mask (some poss. unknown)
    176   // If test=true, return property bits for mask (computing o.w. unknown)
    177   virtual uint64 Properties(uint64 mask, bool test)
    178       const = 0;  // Property bits
    179 
    180   virtual const string& Type() const = 0;    // Fst type name
    181 
    182   // Get a copy of this Fst. The copying behaves as follows:
    183   //
    184   // (1) The copying is constant time if safe = false or if safe = true
    185   // and is on an otherwise unaccessed Fst.
    186   //
    187   // (2) If safe = true, the copy is thread-safe in that the original
    188   // and copy can be safely accessed (but not necessarily mutated) by
    189   // separate threads. For some Fst types, 'Copy(true)' should only be
    190   // called on an Fst that has not otherwise been accessed. Its behavior
    191   // is undefined otherwise.
    192   //
    193   // (3) If a MutableFst is copied and then mutated, then the original is
    194   // unmodified and vice versa (often by a copy-on-write on the initial
    195   // mutation, which may not be constant time).
    196   virtual Fst<A> *Copy(bool safe = false) const = 0;
    197 
    198   // Read an Fst from an input stream; returns NULL on error
    199   static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
    200     FstReadOptions ropts(opts);
    201     FstHeader hdr;
    202     if (ropts.header)
    203       hdr = *opts.header;
    204     else {
    205       if (!hdr.Read(strm, opts.source))
    206         return 0;
    207       ropts.header = &hdr;
    208     }
    209     FstRegister<A> *registr = FstRegister<A>::GetRegister();
    210     const typename FstRegister<A>::Reader reader =
    211       registr->GetReader(hdr.FstType());
    212     if (!reader) {
    213       LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
    214                  << "\" (arc type = \"" << A::Type()
    215                  << "\"): " << ropts.source;
    216       return 0;
    217     }
    218     return reader(strm, ropts);
    219   };
    220 
    221   // Read an Fst from a file; return NULL on error
    222   // Empty filename reads from standard input
    223   static Fst<A> *Read(const string &filename) {
    224     if (!filename.empty()) {
    225       ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
    226       if (!strm) {
    227         LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
    228         return 0;
    229       }
    230       return Read(strm, FstReadOptions(filename));
    231     } else {
    232       return Read(std::cin, FstReadOptions("standard input"));
    233     }
    234   }
    235 
    236   // Write an Fst to an output stream; return false on error
    237   virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
    238     LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
    239                << " Fst type";
    240     return false;
    241   }
    242 
    243   // Write an Fst to a file; return false on error
    244   // Empty filename writes to standard output
    245   virtual bool Write(const string &filename) const {
    246     LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
    247                << " Fst type";
    248     return false;
    249   }
    250 
    251   // Return input label symbol table; return NULL if not specified
    252   virtual const SymbolTable* InputSymbols() const = 0;
    253 
    254   // Return output label symbol table; return NULL if not specified
    255   virtual const SymbolTable* OutputSymbols() const = 0;
    256 
    257   // For generic state iterator construction; not normally called
    258   // directly by users.
    259   virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
    260 
    261   // For generic arc iterator construction; not normally called
    262   // directly by users.
    263   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
    264 
    265   // For generic matcher construction; not normally called
    266   // directly by users.
    267   virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
    268 
    269  protected:
    270 
    271   bool WriteFile(const string &filename) const {
    272     if (!filename.empty()) {
    273       ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
    274       if (!strm) {
    275         LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
    276         return false;
    277       }
    278       return Write(strm, FstWriteOptions(filename));
    279     } else {
    280       return Write(std::cout, FstWriteOptions("standard output"));
    281     }
    282   }
    283 };
    284 
    285 
    286 //
    287 // STATE and ARC ITERATOR DEFINITIONS
    288 //
    289 
    290 // State iterator interface templated on the Arc definition; used
    291 // for StateIterator specializations returned by the InitStateIterator
    292 // Fst method.
    293 template <class A>
    294 class StateIteratorBase {
    295  public:
    296   typedef A Arc;
    297   typedef typename A::StateId StateId;
    298 
    299   virtual ~StateIteratorBase() {}
    300 
    301   bool Done() const { return Done_(); }       // End of iterator?
    302   StateId Value() const { return Value_(); }  // Current state (when !Done)
    303   void Next() { Next_(); }      // Advance to next state (when !Done)
    304   void Reset() { Reset_(); }    // Return to initial condition
    305 
    306  private:
    307   // This allows base class virtual access to non-virtual derived-
    308   // class members of the same name. It makes the derived class more
    309   // efficient to use but unsafe to further derive.
    310   virtual bool Done_() const = 0;
    311   virtual StateId Value_() const = 0;
    312   virtual void Next_() = 0;
    313   virtual void Reset_() = 0;
    314 };
    315 
    316 
    317 // StateIterator initialization data
    318 
    319 template <class A> struct StateIteratorData {
    320   StateIteratorBase<A> *base;   // Specialized iterator if non-zero
    321   typename A::StateId nstates;  // O.w. total # of states
    322 };
    323 
    324 
    325 // Generic state iterator, templated on the FST definition
    326 // - a wrapper around pointer to specific one.
    327 // Here is a typical use: \code
    328 //   for (StateIterator<StdFst> siter(fst);
    329 //        !siter.Done();
    330 //        siter.Next()) {
    331 //     StateId s = siter.Value();
    332 //     ...
    333 //   } \endcode
    334 template <class F>
    335 class StateIterator {
    336  public:
    337   typedef F FST;
    338   typedef typename F::Arc Arc;
    339   typedef typename Arc::StateId StateId;
    340 
    341   explicit StateIterator(const F &fst) : s_(0) {
    342     fst.InitStateIterator(&data_);
    343   }
    344 
    345   ~StateIterator() { if (data_.base) delete data_.base; }
    346 
    347   bool Done() const {
    348     return data_.base ? data_.base->Done() : s_ >= data_.nstates;
    349   }
    350 
    351   StateId Value() const { return data_.base ? data_.base->Value() : s_; }
    352 
    353   void Next() {
    354     if (data_.base)
    355       data_.base->Next();
    356     else
    357       ++s_;
    358   }
    359 
    360   void Reset() {
    361     if (data_.base)
    362       data_.base->Reset();
    363     else
    364       s_ = 0;
    365   }
    366 
    367  private:
    368   StateIteratorData<Arc> data_;
    369   StateId s_;
    370 
    371   DISALLOW_COPY_AND_ASSIGN(StateIterator);
    372 };
    373 
    374 
    375 // Flags to control the behavior on an arc iterator:
    376 static const uint32 kArcILabelValue    = 0x0001;  // Value() gives valid ilabel
    377 static const uint32 kArcOLabelValue    = 0x0002;  //  "       "     "    olabel
    378 static const uint32 kArcWeightValue    = 0x0004;  //  "       "     "    weight
    379 static const uint32 kArcNextStateValue = 0x0008;  //  "       "     " nextstate
    380 static const uint32 kArcNoCache   = 0x0010;       // No need to cache arcs
    381 
    382 static const uint32 kArcValueFlags =
    383                   kArcILabelValue | kArcOLabelValue |
    384                   kArcWeightValue | kArcNextStateValue;
    385 
    386 static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
    387 
    388 
    389 // Arc iterator interface, templated on the Arc definition; used
    390 // for Arc iterator specializations that are returned by the InitArcIterator
    391 // Fst method.
    392 template <class A>
    393 class ArcIteratorBase {
    394  public:
    395   typedef A Arc;
    396   typedef typename A::StateId StateId;
    397 
    398   virtual ~ArcIteratorBase() {}
    399 
    400   bool Done() const { return Done_(); }            // End of iterator?
    401   const A& Value() const { return Value_(); }      // Current arc (when !Done)
    402   void Next() { Next_(); }           // Advance to next arc (when !Done)
    403   size_t Position() const { return Position_(); }  // Return current position
    404   void Reset() { Reset_(); }         // Return to initial condition
    405   void Seek(size_t a) { Seek_(a); }  // Random arc access by position
    406   uint32 Flags() const { return Flags_(); }  // Return current behavorial flags
    407   void SetFlags(uint32 flags, uint32 mask) {  // Set behavorial flags
    408     SetFlags_(flags, mask);
    409   }
    410 
    411  private:
    412   // This allows base class virtual access to non-virtual derived-
    413   // class members of the same name. It makes the derived class more
    414   // efficient to use but unsafe to further derive.
    415   virtual bool Done_() const = 0;
    416   virtual const A& Value_() const = 0;
    417   virtual void Next_() = 0;
    418   virtual size_t Position_() const = 0;
    419   virtual void Reset_() = 0;
    420   virtual void Seek_(size_t a) = 0;
    421   virtual uint32 Flags_() const = 0;
    422   virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
    423 };
    424 
    425 
    426 // ArcIterator initialization data
    427 template <class A> struct ArcIteratorData {
    428   ArcIteratorBase<A> *base;  // Specialized iterator if non-zero
    429   const A *arcs;             // O.w. arcs pointer
    430   size_t narcs;              // ... and arc count
    431   int *ref_count;            // ... and reference count if non-zero
    432 };
    433 
    434 
    435 // Generic arc iterator, templated on the FST definition
    436 // - a wrapper around pointer to specific one.
    437 // Here is a typical use: \code
    438 //   for (ArcIterator<StdFst> aiter(fst, s));
    439 //        !aiter.Done();
    440 //         aiter.Next()) {
    441 //     StdArc &arc = aiter.Value();
    442 //     ...
    443 //   } \endcode
    444 template <class F>
    445 class ArcIterator {
    446    public:
    447   typedef F FST;
    448   typedef typename F::Arc Arc;
    449   typedef typename Arc::StateId StateId;
    450 
    451   ArcIterator(const F &fst, StateId s) : i_(0) {
    452     fst.InitArcIterator(s, &data_);
    453   }
    454 
    455   explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
    456     if (data_.ref_count)
    457       ++(*data_.ref_count);
    458   }
    459 
    460   ~ArcIterator() {
    461     if (data_.base)
    462       delete data_.base;
    463     else if (data_.ref_count)
    464       --(*data_.ref_count);
    465   }
    466 
    467   bool Done() const {
    468     return data_.base ?  data_.base->Done() : i_ >= data_.narcs;
    469   }
    470 
    471   const Arc& Value() const {
    472     return data_.base ? data_.base->Value() : data_.arcs[i_];
    473   }
    474 
    475   void Next() {
    476     if (data_.base)
    477       data_.base->Next();
    478     else
    479       ++i_;
    480   }
    481 
    482   void Reset() {
    483     if (data_.base)
    484       data_.base->Reset();
    485     else
    486       i_ = 0;
    487   }
    488 
    489   void Seek(size_t a) {
    490     if (data_.base)
    491       data_.base->Seek(a);
    492     else
    493       i_ = a;
    494   }
    495 
    496   size_t Position() const {
    497     return data_.base ? data_.base->Position() : i_;
    498   }
    499 
    500   uint32 Flags() const {
    501     if (data_.base)
    502       return data_.base->Flags();
    503     else
    504       return kArcValueFlags;
    505   }
    506 
    507   void SetFlags(uint32 flags, uint32 mask) {
    508     if (data_.base)
    509       data_.base->SetFlags(flags, mask);
    510   }
    511 
    512  private:
    513   ArcIteratorData<Arc> data_;
    514   size_t i_;
    515   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
    516 };
    517 
    518 //
    519 // MATCHER DEFINITIONS
    520 //
    521 
    522 template <class A>
    523 MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
    524   return 0;  // Use the default matcher
    525 }
    526 
    527 
    528 //
    529 // FST ACCESSORS - Useful functions in high-performance cases.
    530 //
    531 
    532 namespace internal {
    533 
    534 // General case - requires non-abstract, 'final' methods. Use for inlining.
    535 template <class F> inline
    536 typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
    537   return fst.F::Final(s);
    538 }
    539 
    540 template <class F> inline
    541 ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
    542   return fst.F::NumArcs(s);
    543 }
    544 
    545 template <class F> inline
    546 ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
    547   return fst.F::NumInputEpsilons(s);
    548 }
    549 
    550 template <class F> inline
    551 ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
    552   return fst.F::NumOutputEpsilons(s);
    553 }
    554 
    555 
    556 //  Fst<A> case - abstract methods.
    557 template <class A> inline
    558 typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
    559   return fst.Final(s);
    560 }
    561 
    562 template <class A> inline
    563 ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
    564   return fst.NumArcs(s);
    565 }
    566 
    567 template <class A> inline
    568 ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
    569   return fst.NumInputEpsilons(s);
    570 }
    571 
    572 template <class A> inline
    573 ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
    574   return fst.NumOutputEpsilons(s);
    575 }
    576 
    577 }  // namespace internal
    578 
    579 // A useful alias when using StdArc.
    580 typedef Fst<StdArc> StdFst;
    581 
    582 
    583 //
    584 //  CONSTANT DEFINITIONS
    585 //
    586 
    587 const int kNoStateId   =  -1;  // Not a valid state ID
    588 const int kNoLabel     =  -1;  // Not a valid label
    589 
    590 //
    591 // Fst IMPLEMENTATION BASE
    592 //
    593 // This is the recommended Fst implementation base class. It will
    594 // handle reference counts, property bits, type information and symbols.
    595 //
    596 
    597 template <class A> class FstImpl {
    598  public:
    599   typedef typename A::Weight Weight;
    600   typedef typename A::StateId StateId;
    601 
    602   FstImpl()
    603       : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
    604 
    605   FstImpl(const FstImpl<A> &impl)
    606       : properties_(impl.properties_), type_(impl.type_),
    607         isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
    608         osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
    609 
    610   virtual ~FstImpl() {
    611     delete isymbols_;
    612     delete osymbols_;
    613   }
    614 
    615   const string& Type() const { return type_; }
    616 
    617   void SetType(const string &type) { type_ = type; }
    618 
    619   virtual uint64 Properties() const { return properties_; }
    620 
    621   virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
    622 
    623   void SetProperties(uint64 props) {
    624     properties_ &= kError;          // kError can't be cleared
    625     properties_ |= props;
    626   }
    627 
    628   void SetProperties(uint64 props, uint64 mask) {
    629     properties_ &= ~mask | kError;  // kError can't be cleared
    630     properties_ |= props & mask;
    631   }
    632 
    633   // Allows (only) setting error bit on const FST impls
    634   void SetProperties(uint64 props, uint64 mask) const {
    635     if (mask != kError)
    636       FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
    637     properties_ |= kError;
    638   }
    639 
    640   const SymbolTable* InputSymbols() const { return isymbols_; }
    641 
    642   const SymbolTable* OutputSymbols() const { return osymbols_; }
    643 
    644   SymbolTable* InputSymbols() { return isymbols_; }
    645 
    646   SymbolTable* OutputSymbols() { return osymbols_; }
    647 
    648   void SetInputSymbols(const SymbolTable* isyms) {
    649     if (isymbols_) delete isymbols_;
    650     isymbols_ = isyms ? isyms->Copy() : 0;
    651   }
    652 
    653   void SetOutputSymbols(const SymbolTable* osyms) {
    654     if (osymbols_) delete osymbols_;
    655     osymbols_ = osyms ? osyms->Copy() : 0;
    656   }
    657 
    658   int RefCount() const {
    659     return ref_count_.count();
    660   }
    661 
    662   int IncrRefCount() {
    663     return ref_count_.Incr();
    664   }
    665 
    666   int DecrRefCount() {
    667     return ref_count_.Decr();
    668   }
    669 
    670   // Read-in header and symbols from input stream, initialize Fst, and
    671   // return the header.  If opts.header is non-null, skip read-in and
    672   // use the option value.  If opts.[io]symbols is non-null, read-in
    673   // (if present), but use the option value.
    674   bool ReadHeader(istream &strm, const FstReadOptions& opts,
    675                   int min_version, FstHeader *hdr);
    676 
    677   // Write-out header and symbols from output stream.
    678   // If a opts.header is false, skip writing header.
    679   // If opts.[io]symbols is false, skip writing those symbols.
    680   // This method is needed for Impl's that implement Write methods.
    681   void WriteHeader(ostream &strm, const FstWriteOptions& opts,
    682                    int version, FstHeader *hdr) const {
    683     if (opts.write_header) {
    684       hdr->SetFstType(type_);
    685       hdr->SetArcType(A::Type());
    686       hdr->SetVersion(version);
    687       hdr->SetProperties(properties_);
    688       int32 file_flags = 0;
    689       if (isymbols_ && opts.write_isymbols)
    690         file_flags |= FstHeader::HAS_ISYMBOLS;
    691       if (osymbols_ && opts.write_osymbols)
    692         file_flags |= FstHeader::HAS_OSYMBOLS;
    693       if (opts.align)
    694         file_flags |= FstHeader::IS_ALIGNED;
    695       hdr->SetFlags(file_flags);
    696       hdr->Write(strm, opts.source);
    697     }
    698     if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
    699     if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
    700   }
    701 
    702   // Write-out header and symbols to output stream.
    703   // If a opts.header is false, skip writing header.
    704   // If opts.[io]symbols is false, skip writing those symbols.
    705   // type is the Fst type being written.
    706   // This method is used in the cross-type serialization methods Fst::WriteFst.
    707   static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
    708                              const FstWriteOptions& opts, int version,
    709                              const string &type, FstHeader *hdr) {
    710     if (opts.write_header) {
    711       hdr->SetFstType(type);
    712       hdr->SetArcType(A::Type());
    713       hdr->SetVersion(version);
    714       hdr->SetProperties(fst.Properties(kFstProperties, false));
    715       int32 file_flags = 0;
    716       if (fst.InputSymbols() && opts.write_isymbols)
    717         file_flags |= FstHeader::HAS_ISYMBOLS;
    718       if (fst.OutputSymbols() && opts.write_osymbols)
    719         file_flags |= FstHeader::HAS_OSYMBOLS;
    720       if (opts.align)
    721         file_flags |= FstHeader::IS_ALIGNED;
    722       hdr->SetFlags(file_flags);
    723       hdr->Write(strm, opts.source);
    724     }
    725     if (fst.InputSymbols() && opts.write_isymbols) {
    726       fst.InputSymbols()->Write(strm);
    727     }
    728     if (fst.OutputSymbols() && opts.write_osymbols) {
    729       fst.OutputSymbols()->Write(strm);
    730     }
    731   }
    732 
    733   // In serialization routines where the header cannot be written until after
    734   // the machine has been serialized, this routine can be called to seek to
    735   // the beginning of the file an rewrite the header with updated fields.
    736   // It repositions the file pointer back at the end of the file.
    737   // returns true on success, false on failure.
    738   static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
    739                               const FstWriteOptions& opts, int version,
    740                               const string &type, FstHeader *hdr,
    741                               size_t header_offset) {
    742     strm.seekp(header_offset);
    743     if (!strm) {
    744       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
    745       return false;
    746     }
    747     WriteFstHeader(fst, strm, opts, version, type, hdr);
    748     if (!strm) {
    749       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
    750       return false;
    751     }
    752     strm.seekp(0, ios_base::end);
    753     if (!strm) {
    754       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
    755       return false;
    756     }
    757     return true;
    758   }
    759 
    760  protected:
    761   mutable uint64 properties_;           // Property bits
    762 
    763  private:
    764   string type_;                 // Unique name of Fst class
    765   SymbolTable *isymbols_;       // Ilabel symbol table
    766   SymbolTable *osymbols_;       // Olabel symbol table
    767   RefCounter ref_count_;        // Reference count
    768 
    769   void operator=(const FstImpl<A> &impl);  // disallow
    770 };
    771 
    772 template <class A> inline
    773 bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
    774                             int min_version, FstHeader *hdr) {
    775   if (opts.header)
    776     *hdr = *opts.header;
    777   else if (!hdr->Read(strm, opts.source))
    778     return false;
    779 
    780   if (FLAGS_v >= 2) {
    781     LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
    782               << ", fst_type: " << hdr->FstType()
    783               << ", arc_type: " << A::Type()
    784               << ", version: " << hdr->Version()
    785               << ", flags: " << hdr->GetFlags();
    786   }
    787 
    788   if (hdr->FstType() != type_) {
    789     LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
    790                << "\": " << opts.source;
    791     return false;
    792   }
    793   if (hdr->ArcType() != A::Type()) {
    794     LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
    795                << "\": " << opts.source;
    796     return false;
    797   }
    798   if (hdr->Version() < min_version) {
    799     LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
    800                << " Fst version: " << opts.source;
    801     return false;
    802   }
    803   properties_ = hdr->Properties();
    804   if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
    805     isymbols_ = SymbolTable::Read(strm, opts.source);
    806   if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
    807     osymbols_ =SymbolTable::Read(strm, opts.source);
    808 
    809   if (opts.isymbols) {
    810     delete isymbols_;
    811     isymbols_ = opts.isymbols->Copy();
    812   }
    813   if (opts.osymbols) {
    814     delete osymbols_;
    815     osymbols_ = opts.osymbols->Copy();
    816   }
    817   return true;
    818 }
    819 
    820 
    821 template<class Arc>
    822 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
    823 
    824 
    825 // This is a helper class template useful for attaching an Fst interface to
    826 // its implementation, handling reference counting.
    827 template < class I, class F = Fst<typename I::Arc> >
    828 class ImplToFst : public F {
    829  public:
    830   typedef typename I::Arc Arc;
    831   typedef typename Arc::Weight Weight;
    832   typedef typename Arc::StateId StateId;
    833 
    834   virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_;  }
    835 
    836   virtual StateId Start() const { return impl_->Start(); }
    837 
    838   virtual Weight Final(StateId s) const { return impl_->Final(s); }
    839 
    840   virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
    841 
    842   virtual size_t NumInputEpsilons(StateId s) const {
    843     return impl_->NumInputEpsilons(s);
    844   }
    845 
    846   virtual size_t NumOutputEpsilons(StateId s) const {
    847     return impl_->NumOutputEpsilons(s);
    848   }
    849 
    850   virtual uint64 Properties(uint64 mask, bool test) const {
    851     if (test) {
    852       uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
    853       impl_->SetProperties(testprops, knownprops);
    854       return testprops & mask;
    855     } else {
    856       return impl_->Properties(mask);
    857     }
    858   }
    859 
    860   virtual const string& Type() const { return impl_->Type(); }
    861 
    862   virtual const SymbolTable* InputSymbols() const {
    863     return impl_->InputSymbols();
    864   }
    865 
    866   virtual const SymbolTable* OutputSymbols() const {
    867     return impl_->OutputSymbols();
    868   }
    869 
    870  protected:
    871   ImplToFst() : impl_(0) {}
    872 
    873   ImplToFst(I *impl) : impl_(impl) {}
    874 
    875   ImplToFst(const ImplToFst<I, F> &fst) {
    876     impl_ = fst.impl_;
    877     impl_->IncrRefCount();
    878   }
    879 
    880   // This constructor presumes there is a copy constructor for the
    881   // implementation.
    882   ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
    883     if (safe) {
    884       impl_ = new I(*(fst.impl_));
    885     } else {
    886       impl_ = fst.impl_;
    887       impl_->IncrRefCount();
    888     }
    889   }
    890 
    891   I *GetImpl() const { return impl_; }
    892 
    893   // Change Fst implementation pointer. If 'own_impl' is true,
    894   // ownership of the input implementation is given to this
    895   // object; otherwise, the input implementation's reference count
    896   // should be incremented.
    897   void SetImpl(I *impl, bool own_impl = true) {
    898     if (!own_impl)
    899       impl->IncrRefCount();
    900     if (impl_ && !impl_->DecrRefCount()) delete impl_;
    901     impl_ = impl;
    902   }
    903 
    904  private:
    905   // Disallow
    906   ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
    907 
    908   ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
    909     FSTERROR() << "ImplToFst: Assignment operator disallowed";
    910     GetImpl()->SetProperties(kError, kError);
    911     return *this;
    912   }
    913 
    914   I *impl_;
    915 };
    916 
    917 
    918 // Converts FSTs by casting their implementations, where this makes
    919 // sense (which excludes implementations with weight-dependent virtual
    920 // methods). Must be a friend of the Fst classes involved (currently
    921 // the concrete Fsts: VectorFst, ConstFst, CompactFst).
    922 template<class F, class G> void Cast(const F &ifst, G *ofst) {
    923   ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
    924 }
    925 
    926 // Fst Serialization
    927 template <class A>
    928 void FstToString(const Fst<A> &fst, string *result) {
    929   ostringstream ostrm;
    930   fst.Write(ostrm, FstWriteOptions("FstToString"));
    931   *result = ostrm.str();
    932 }
    933 
    934 template <class A>
    935 Fst<A> *StringToFst(const string &s) {
    936   istringstream istrm(s);
    937   return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
    938 }
    939 
    940 }  // namespace fst
    941 
    942 #endif  // FST_LIB_FST_H__
    943