Home | History | Annotate | Download | only in far
      1 // far.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Finite-State Transducer (FST) archive classes.
     20 //
     21 
     22 #ifndef FST_EXTENSIONS_FAR_FAR_H__
     23 #define FST_EXTENSIONS_FAR_FAR_H__
     24 
     25 #include <fst/extensions/far/stlist.h>
     26 #include <fst/extensions/far/sttable.h>
     27 #include <fst/fst.h>
     28 #include <fst/vector-fst.h>
     29 
     30 namespace fst {
     31 
     32 enum FarEntryType { FET_LINE, FET_FILE };
     33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
     34 
     35 // FST archive header class
     36 class FarHeader {
     37  public:
     38   const string &FarType() const { return fartype_; }
     39   const string &ArcType() const { return arctype_; }
     40 
     41   bool Read(const string &filename) {
     42     FstHeader fsthdr;
     43     if (filename.empty()) {  // Header reading unsupported on stdin.
     44       return false;
     45     } else if (IsSTTable(filename)) {  // Check if STTable
     46       ReadSTTableHeader(filename, &fsthdr);
     47       fartype_ = "sttable";
     48       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
     49       return true;
     50     } else if (IsSTList(filename)) {  // Check if STList
     51       ReadSTListHeader(filename, &fsthdr);
     52       fartype_ = "sttable";
     53       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
     54       return true;
     55     }
     56     return false;
     57   }
     58 
     59  private:
     60   string fartype_;
     61   string arctype_;
     62 };
     63 
     64 enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
     65                FAR_SSTABLE = 3 };
     66 
     67 // This class creates an archive of FSTs.
     68 template <class A>
     69 class FarWriter {
     70  public:
     71   typedef A Arc;
     72 
     73   // Creates a new (empty) FST archive; returns NULL on error.
     74   static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
     75 
     76   // Adds an FST to the end of an archive. Keys must be non-empty and
     77   // in lexicographic order. FSTs must have a suitable write method.
     78   virtual void Add(const string &key, const Fst<A> &fst) = 0;
     79 
     80   virtual FarType Type() const = 0;
     81 
     82   virtual bool Error() const = 0;
     83 
     84   virtual ~FarWriter() {}
     85 
     86  protected:
     87   FarWriter() {}
     88 
     89  private:
     90   DISALLOW_COPY_AND_ASSIGN(FarWriter);
     91 };
     92 
     93 
     94 // This class iterates through an existing archive of FSTs.
     95 template <class A>
     96 class FarReader {
     97  public:
     98  typedef A Arc;
     99 
    100   // Opens an existing FST archive in a single file; returns NULL on error.
    101   // Sets current position to the beginning of the achive.
    102   static FarReader *Open(const string &filename);
    103 
    104   // Opens an existing FST archive in multiple files; returns NULL on error.
    105   // Sets current position to the beginning of the achive.
    106   static FarReader *Open(const vector<string> &filenames);
    107 
    108   // Resets current posision to beginning of archive.
    109   virtual void Reset() = 0;
    110 
    111   // Sets current position to first entry >= key.  Returns true if a match.
    112   virtual bool Find(const string &key) = 0;
    113 
    114   // Current position at end of archive?
    115   virtual bool Done() const = 0;
    116 
    117   // Move current position to next FST.
    118   virtual void Next() = 0;
    119 
    120   // Returns key at the current position. This reference is invalidated if
    121   // the current position in the archive is changed.
    122   virtual const string &GetKey() const = 0;
    123 
    124   // Returns FST at the current position. This reference is invalidated if
    125   // the current position in the archive is changed.
    126   virtual const Fst<A> &GetFst() const = 0;
    127 
    128   virtual FarType Type() const = 0;
    129 
    130   virtual bool Error() const = 0;
    131 
    132   virtual ~FarReader() {}
    133 
    134  protected:
    135   FarReader() {}
    136 
    137  private:
    138   DISALLOW_COPY_AND_ASSIGN(FarReader);
    139 };
    140 
    141 
    142 template <class A>
    143 class FstWriter {
    144  public:
    145   void operator()(ostream &strm, const Fst<A> &fst) const {
    146     fst.Write(strm, FstWriteOptions());
    147   }
    148 };
    149 
    150 
    151 template <class A>
    152 class STTableFarWriter : public FarWriter<A> {
    153  public:
    154   typedef A Arc;
    155 
    156   static STTableFarWriter *Create(const string filename) {
    157     STTableWriter<Fst<A>, FstWriter<A> > *writer =
    158         STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
    159     return new STTableFarWriter(writer);
    160   }
    161 
    162   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
    163 
    164   FarType Type() const { return FAR_STTABLE; }
    165 
    166   bool Error() const { return writer_->Error(); }
    167 
    168   ~STTableFarWriter() { delete writer_; }
    169 
    170  private:
    171   explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
    172       : writer_(writer) {}
    173 
    174  private:
    175   STTableWriter<Fst<A>, FstWriter<A> > *writer_;
    176 
    177   DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
    178 };
    179 
    180 
    181 template <class A>
    182 class STListFarWriter : public FarWriter<A> {
    183  public:
    184   typedef A Arc;
    185 
    186   static STListFarWriter *Create(const string filename) {
    187     STListWriter<Fst<A>, FstWriter<A> > *writer =
    188         STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
    189     return new STListFarWriter(writer);
    190   }
    191 
    192   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
    193 
    194   FarType Type() const { return FAR_STLIST; }
    195 
    196   bool Error() const { return writer_->Error(); }
    197 
    198   ~STListFarWriter() { delete writer_; }
    199 
    200  private:
    201   explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
    202       : writer_(writer) {}
    203 
    204  private:
    205   STListWriter<Fst<A>, FstWriter<A> > *writer_;
    206 
    207   DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
    208 };
    209 
    210 
    211 template <class A>
    212 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
    213   switch(type) {
    214     case FAR_DEFAULT:
    215       if (filename.empty())
    216         return STListFarWriter<A>::Create(filename);
    217     case FAR_STTABLE:
    218       return STTableFarWriter<A>::Create(filename);
    219       break;
    220     case FAR_STLIST:
    221       return STListFarWriter<A>::Create(filename);
    222       break;
    223     default:
    224       LOG(ERROR) << "FarWriter::Create: unknown far type";
    225       return 0;
    226   }
    227 }
    228 
    229 
    230 template <class A>
    231 class FstReader {
    232  public:
    233   Fst<A> *operator()(istream &strm) const {
    234     return Fst<A>::Read(strm, FstReadOptions());
    235   }
    236 };
    237 
    238 
    239 template <class A>
    240 class STTableFarReader : public FarReader<A> {
    241  public:
    242   typedef A Arc;
    243 
    244   static STTableFarReader *Open(const string &filename) {
    245     STTableReader<Fst<A>, FstReader<A> > *reader =
    246         STTableReader<Fst<A>, FstReader<A> >::Open(filename);
    247     // TODO: error check
    248     return new STTableFarReader(reader);
    249   }
    250 
    251   static STTableFarReader *Open(const vector<string> &filenames) {
    252     STTableReader<Fst<A>, FstReader<A> > *reader =
    253         STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
    254     // TODO: error check
    255     return new STTableFarReader(reader);
    256   }
    257 
    258   void Reset() { reader_->Reset(); }
    259 
    260   bool Find(const string &key) { return reader_->Find(key); }
    261 
    262   bool Done() const { return reader_->Done(); }
    263 
    264   void Next() { return reader_->Next(); }
    265 
    266   const string &GetKey() const { return reader_->GetKey(); }
    267 
    268   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
    269 
    270   FarType Type() const { return FAR_STTABLE; }
    271 
    272   bool Error() const { return reader_->Error(); }
    273 
    274   ~STTableFarReader() { delete reader_; }
    275 
    276  private:
    277   explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
    278       : reader_(reader) {}
    279 
    280  private:
    281   STTableReader<Fst<A>, FstReader<A> > *reader_;
    282 
    283   DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
    284 };
    285 
    286 
    287 template <class A>
    288 class STListFarReader : public FarReader<A> {
    289  public:
    290   typedef A Arc;
    291 
    292   static STListFarReader *Open(const string &filename) {
    293     STListReader<Fst<A>, FstReader<A> > *reader =
    294         STListReader<Fst<A>, FstReader<A> >::Open(filename);
    295     // TODO: error check
    296     return new STListFarReader(reader);
    297   }
    298 
    299   static STListFarReader *Open(const vector<string> &filenames) {
    300     STListReader<Fst<A>, FstReader<A> > *reader =
    301         STListReader<Fst<A>, FstReader<A> >::Open(filenames);
    302     // TODO: error check
    303     return new STListFarReader(reader);
    304   }
    305 
    306   void Reset() { reader_->Reset(); }
    307 
    308   bool Find(const string &key) { return reader_->Find(key); }
    309 
    310   bool Done() const { return reader_->Done(); }
    311 
    312   void Next() { return reader_->Next(); }
    313 
    314   const string &GetKey() const { return reader_->GetKey(); }
    315 
    316   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
    317 
    318   FarType Type() const { return FAR_STLIST; }
    319 
    320   bool Error() const { return reader_->Error(); }
    321 
    322   ~STListFarReader() { delete reader_; }
    323 
    324  private:
    325   explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
    326       : reader_(reader) {}
    327 
    328  private:
    329   STListReader<Fst<A>, FstReader<A> > *reader_;
    330 
    331   DISALLOW_COPY_AND_ASSIGN(STListFarReader);
    332 };
    333 
    334 
    335 template <class A>
    336 FarReader<A> *FarReader<A>::Open(const string &filename) {
    337   if (filename.empty())
    338     return STListFarReader<A>::Open(filename);
    339   else if (IsSTTable(filename))
    340     return STTableFarReader<A>::Open(filename);
    341   else if (IsSTList(filename))
    342     return STListFarReader<A>::Open(filename);
    343   return 0;
    344 }
    345 
    346 
    347 template <class A>
    348 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
    349   if (!filenames.empty() && filenames[0].empty())
    350     return STListFarReader<A>::Open(filenames);
    351   else if (!filenames.empty() && IsSTTable(filenames[0]))
    352     return STTableFarReader<A>::Open(filenames);
    353   else if (!filenames.empty() && IsSTList(filenames[0]))
    354     return STListFarReader<A>::Open(filenames);
    355   return 0;
    356 }
    357 
    358 }  // namespace fst
    359 
    360 #endif  // FST_EXTENSIONS_FAR_FAR_H__
    361