Home | History | Annotate | Download | only in far
      1 // far.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Finite-State Transducer (FST) archive classes.
     20 //
     21 
     22 #ifndef FST_EXTENSIONS_FAR_FAR_H__
     23 #define FST_EXTENSIONS_FAR_FAR_H__
     24 
     25 #include <fst/extensions/far/stlist.h>
     26 #include <fst/extensions/far/sttable.h>
     27 #include <fst/fst.h>
     28 #include <fst/vector-fst.h>
     29 
     30 namespace fst {
     31 
     32 enum FarEntryType { FET_LINE, FET_FILE };
     33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
     34 
     35 inline bool IsFst(const string &filename) {
     36   ifstream strm(filename.c_str());
     37   if (!strm)
     38     return false;
     39   return IsFstHeader(strm, filename);
     40 }
     41 
     42 // FST archive header class
     43 class FarHeader {
     44  public:
     45   const string &FarType() const { return fartype_; }
     46   const string &ArcType() const { return arctype_; }
     47 
     48   bool Read(const string &filename) {
     49     FstHeader fsthdr;
     50     if (filename.empty()) {
     51       // Header reading unsupported on stdin. Assumes STList and StdArc.
     52       fartype_ = "stlist";
     53       arctype_ = "standard";
     54       return true;
     55     } else if (IsSTTable(filename)) {  // Check if STTable
     56       ReadSTTableHeader(filename, &fsthdr);
     57       fartype_ = "sttable";
     58       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
     59       return true;
     60     } else if (IsSTList(filename)) {  // Check if STList
     61       ReadSTListHeader(filename, &fsthdr);
     62       fartype_ = "sttable";
     63       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
     64       return true;
     65     } else if (IsFst(filename)) {  // Check if Fst
     66       ifstream istrm(filename.c_str());
     67       fsthdr.Read(istrm, filename);
     68       fartype_ = "fst";
     69       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
     70       return true;
     71     }
     72     return false;
     73   }
     74 
     75  private:
     76   string fartype_;
     77   string arctype_;
     78 };
     79 
     80 enum FarType {
     81   FAR_DEFAULT = 0,
     82   FAR_STTABLE = 1,
     83   FAR_STLIST = 2,
     84   FAR_FST = 3,
     85 };
     86 
     87 // This class creates an archive of FSTs.
     88 template <class A>
     89 class FarWriter {
     90  public:
     91   typedef A Arc;
     92 
     93   // Creates a new (empty) FST archive; returns NULL on error.
     94   static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
     95 
     96   // Adds an FST to the end of an archive. Keys must be non-empty and
     97   // in lexicographic order. FSTs must have a suitable write method.
     98   virtual void Add(const string &key, const Fst<A> &fst) = 0;
     99 
    100   virtual FarType Type() const = 0;
    101 
    102   virtual bool Error() const = 0;
    103 
    104   virtual ~FarWriter() {}
    105 
    106  protected:
    107   FarWriter() {}
    108 
    109  private:
    110   DISALLOW_COPY_AND_ASSIGN(FarWriter);
    111 };
    112 
    113 
    114 // This class iterates through an existing archive of FSTs.
    115 template <class A>
    116 class FarReader {
    117  public:
    118  typedef A Arc;
    119 
    120   // Opens an existing FST archive in a single file; returns NULL on error.
    121   // Sets current position to the beginning of the achive.
    122   static FarReader *Open(const string &filename);
    123 
    124   // Opens an existing FST archive in multiple files; returns NULL on error.
    125   // Sets current position to the beginning of the achive.
    126   static FarReader *Open(const vector<string> &filenames);
    127 
    128   // Resets current posision to beginning of archive.
    129   virtual void Reset() = 0;
    130 
    131   // Sets current position to first entry >= key.  Returns true if a match.
    132   virtual bool Find(const string &key) = 0;
    133 
    134   // Current position at end of archive?
    135   virtual bool Done() const = 0;
    136 
    137   // Move current position to next FST.
    138   virtual void Next() = 0;
    139 
    140   // Returns key at the current position. This reference is invalidated if
    141   // the current position in the archive is changed.
    142   virtual const string &GetKey() const = 0;
    143 
    144   // Returns FST at the current position. This reference is invalidated if
    145   // the current position in the archive is changed.
    146   virtual const Fst<A> &GetFst() const = 0;
    147 
    148   virtual FarType Type() const = 0;
    149 
    150   virtual bool Error() const = 0;
    151 
    152   virtual ~FarReader() {}
    153 
    154  protected:
    155   FarReader() {}
    156 
    157  private:
    158   DISALLOW_COPY_AND_ASSIGN(FarReader);
    159 };
    160 
    161 
    162 template <class A>
    163 class FstWriter {
    164  public:
    165   void operator()(ostream &strm, const Fst<A> &fst) const {
    166     fst.Write(strm, FstWriteOptions());
    167   }
    168 };
    169 
    170 
    171 template <class A>
    172 class STTableFarWriter : public FarWriter<A> {
    173  public:
    174   typedef A Arc;
    175 
    176   static STTableFarWriter *Create(const string &filename) {
    177     STTableWriter<Fst<A>, FstWriter<A> > *writer =
    178         STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
    179     return new STTableFarWriter(writer);
    180   }
    181 
    182   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
    183 
    184   FarType Type() const { return FAR_STTABLE; }
    185 
    186   bool Error() const { return writer_->Error(); }
    187 
    188   ~STTableFarWriter() { delete writer_; }
    189 
    190  private:
    191   explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
    192       : writer_(writer) {}
    193 
    194  private:
    195   STTableWriter<Fst<A>, FstWriter<A> > *writer_;
    196 
    197   DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
    198 };
    199 
    200 
    201 template <class A>
    202 class STListFarWriter : public FarWriter<A> {
    203  public:
    204   typedef A Arc;
    205 
    206   static STListFarWriter *Create(const string &filename) {
    207     STListWriter<Fst<A>, FstWriter<A> > *writer =
    208         STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
    209     return new STListFarWriter(writer);
    210   }
    211 
    212   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
    213 
    214   FarType Type() const { return FAR_STLIST; }
    215 
    216   bool Error() const { return writer_->Error(); }
    217 
    218   ~STListFarWriter() { delete writer_; }
    219 
    220  private:
    221   explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
    222       : writer_(writer) {}
    223 
    224  private:
    225   STListWriter<Fst<A>, FstWriter<A> > *writer_;
    226 
    227   DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
    228 };
    229 
    230 
    231 template <class A>
    232 class FstFarWriter : public FarWriter<A> {
    233  public:
    234   typedef A Arc;
    235 
    236   explicit FstFarWriter(const string &filename)
    237       : filename_(filename), error_(false), written_(false) {}
    238 
    239   static FstFarWriter *Create(const string &filename) {
    240     return new FstFarWriter(filename);
    241   }
    242 
    243   void Add(const string &key, const Fst<A> &fst) {
    244     if (written_) {
    245       LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
    246                  << " subsequent entries discarded.";
    247     } else {
    248       error_ = !fst.Write(filename_);
    249       written_ = true;
    250     }
    251   }
    252 
    253   FarType Type() const { return FAR_FST; }
    254 
    255   bool Error() const { return error_; }
    256 
    257   ~FstFarWriter() {}
    258 
    259  private:
    260   string filename_;
    261   bool error_;
    262   bool written_;
    263 
    264   DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
    265 };
    266 
    267 
    268 template <class A>
    269 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
    270   switch(type) {
    271     case FAR_DEFAULT:
    272       if (filename.empty())
    273         return STListFarWriter<A>::Create(filename);
    274     case FAR_STTABLE:
    275       return STTableFarWriter<A>::Create(filename);
    276     case FAR_STLIST:
    277       return STListFarWriter<A>::Create(filename);
    278     case FAR_FST:
    279       return FstFarWriter<A>::Create(filename);
    280     default:
    281       LOG(ERROR) << "FarWriter::Create: unknown far type";
    282       return 0;
    283   }
    284 }
    285 
    286 
    287 template <class A>
    288 class FstReader {
    289  public:
    290   Fst<A> *operator()(istream &strm) const {
    291     return Fst<A>::Read(strm, FstReadOptions());
    292   }
    293 };
    294 
    295 
    296 template <class A>
    297 class STTableFarReader : public FarReader<A> {
    298  public:
    299   typedef A Arc;
    300 
    301   static STTableFarReader *Open(const string &filename) {
    302     STTableReader<Fst<A>, FstReader<A> > *reader =
    303         STTableReader<Fst<A>, FstReader<A> >::Open(filename);
    304     // TODO: error check
    305     return new STTableFarReader(reader);
    306   }
    307 
    308   static STTableFarReader *Open(const vector<string> &filenames) {
    309     STTableReader<Fst<A>, FstReader<A> > *reader =
    310         STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
    311     // TODO: error check
    312     return new STTableFarReader(reader);
    313   }
    314 
    315   void Reset() { reader_->Reset(); }
    316 
    317   bool Find(const string &key) { return reader_->Find(key); }
    318 
    319   bool Done() const { return reader_->Done(); }
    320 
    321   void Next() { return reader_->Next(); }
    322 
    323   const string &GetKey() const { return reader_->GetKey(); }
    324 
    325   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
    326 
    327   FarType Type() const { return FAR_STTABLE; }
    328 
    329   bool Error() const { return reader_->Error(); }
    330 
    331   ~STTableFarReader() { delete reader_; }
    332 
    333  private:
    334   explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
    335       : reader_(reader) {}
    336 
    337  private:
    338   STTableReader<Fst<A>, FstReader<A> > *reader_;
    339 
    340   DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
    341 };
    342 
    343 
    344 template <class A>
    345 class STListFarReader : public FarReader<A> {
    346  public:
    347   typedef A Arc;
    348 
    349   static STListFarReader *Open(const string &filename) {
    350     STListReader<Fst<A>, FstReader<A> > *reader =
    351         STListReader<Fst<A>, FstReader<A> >::Open(filename);
    352     // TODO: error check
    353     return new STListFarReader(reader);
    354   }
    355 
    356   static STListFarReader *Open(const vector<string> &filenames) {
    357     STListReader<Fst<A>, FstReader<A> > *reader =
    358         STListReader<Fst<A>, FstReader<A> >::Open(filenames);
    359     // TODO: error check
    360     return new STListFarReader(reader);
    361   }
    362 
    363   void Reset() { reader_->Reset(); }
    364 
    365   bool Find(const string &key) { return reader_->Find(key); }
    366 
    367   bool Done() const { return reader_->Done(); }
    368 
    369   void Next() { return reader_->Next(); }
    370 
    371   const string &GetKey() const { return reader_->GetKey(); }
    372 
    373   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
    374 
    375   FarType Type() const { return FAR_STLIST; }
    376 
    377   bool Error() const { return reader_->Error(); }
    378 
    379   ~STListFarReader() { delete reader_; }
    380 
    381  private:
    382   explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
    383       : reader_(reader) {}
    384 
    385  private:
    386   STListReader<Fst<A>, FstReader<A> > *reader_;
    387 
    388   DISALLOW_COPY_AND_ASSIGN(STListFarReader);
    389 };
    390 
    391 template <class A>
    392 class FstFarReader : public FarReader<A> {
    393  public:
    394   typedef A Arc;
    395 
    396   static FstFarReader *Open(const string &filename) {
    397     vector<string> filenames;
    398     filenames.push_back(filename);
    399     return new FstFarReader<A>(filenames);
    400   }
    401 
    402   static FstFarReader *Open(const vector<string> &filenames) {
    403     return new FstFarReader<A>(filenames);
    404   }
    405 
    406   FstFarReader(const vector<string> &filenames)
    407       : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
    408     sort(keys_.begin(), keys_.end());
    409     streams_.resize(keys_.size(), 0);
    410     for (size_t i = 0; i < keys_.size(); ++i) {
    411       if (keys_[i].empty()) {
    412         if (!has_stdin_) {
    413           streams_[i] = &cin;
    414           //sources_[i] = "stdin";
    415           has_stdin_ = true;
    416         } else {
    417           FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
    418                      << "appear once in the input file list.";
    419           error_ = true;
    420           return;
    421         }
    422       } else {
    423         streams_[i] = new ifstream(
    424             keys_[i].c_str(), ifstream::in | ifstream::binary);
    425       }
    426     }
    427     if (pos_ >= keys_.size()) return;
    428     ReadFst();
    429   }
    430 
    431   void Reset() {
    432     if (has_stdin_) {
    433       FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
    434       error_ = true;
    435       return;
    436     }
    437     pos_ = 0;
    438     ReadFst();
    439   }
    440 
    441   bool Find(const string &key) {
    442     if (has_stdin_) {
    443       FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
    444       error_ = true;
    445       return false;
    446     }
    447     pos_ = 0;//TODO
    448     ReadFst();
    449     return true;
    450   }
    451 
    452   bool Done() const { return error_ || pos_ >= keys_.size(); }
    453 
    454   void Next() {
    455     ++pos_;
    456     ReadFst();
    457   }
    458 
    459   const string &GetKey() const {
    460     return keys_[pos_];
    461   }
    462 
    463   const Fst<A> &GetFst() const {
    464     return *fst_;
    465   }
    466 
    467   FarType Type() const { return FAR_FST; }
    468 
    469   bool Error() const { return error_; }
    470 
    471   ~FstFarReader() {
    472     if (fst_) delete fst_;
    473     for (size_t i = 0; i < keys_.size(); ++i)
    474       delete streams_[i];
    475   }
    476 
    477  private:
    478   void ReadFst() {
    479     if (fst_) delete fst_;
    480     if (pos_ >= keys_.size()) return;
    481     streams_[pos_]->seekg(0);
    482     fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
    483     if (!fst_) {
    484       FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
    485       error_ = true;
    486     }
    487   }
    488 
    489  private:
    490   vector<string> keys_;
    491   vector<istream*> streams_;
    492   bool has_stdin_;
    493   size_t pos_;
    494   mutable Fst<A> *fst_;
    495   mutable bool error_;
    496 
    497   DISALLOW_COPY_AND_ASSIGN(FstFarReader);
    498 };
    499 
    500 template <class A>
    501 FarReader<A> *FarReader<A>::Open(const string &filename) {
    502   if (filename.empty())
    503     return STListFarReader<A>::Open(filename);
    504   else if (IsSTTable(filename))
    505     return STTableFarReader<A>::Open(filename);
    506   else if (IsSTList(filename))
    507     return STListFarReader<A>::Open(filename);
    508   else if (IsFst(filename))
    509     return FstFarReader<A>::Open(filename);
    510   return 0;
    511 }
    512 
    513 
    514 template <class A>
    515 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
    516   if (!filenames.empty() && filenames[0].empty())
    517     return STListFarReader<A>::Open(filenames);
    518   else if (!filenames.empty() && IsSTTable(filenames[0]))
    519     return STTableFarReader<A>::Open(filenames);
    520   else if (!filenames.empty() && IsSTList(filenames[0]))
    521     return STListFarReader<A>::Open(filenames);
    522   else if (!filenames.empty() && IsFst(filenames[0]))
    523     return FstFarReader<A>::Open(filenames);
    524   return 0;
    525 }
    526 
    527 }  // namespace fst
    528 
    529 #endif  // FST_EXTENSIONS_FAR_FAR_H__
    530