Home | History | Annotate | Download | only in far
      1 
      2 // Licensed under the Apache License, Version 2.0 (the "License");
      3 // you may not use this file except in compliance with the License.
      4 // You may obtain a copy of the License at
      5 //
      6 //     http://www.apache.org/licenses/LICENSE-2.0
      7 //
      8 // Unless required by applicable law or agreed to in writing, software
      9 // distributed under the License is distributed on an "AS IS" BASIS,
     10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     11 // See the License for the specific language governing permissions and
     12 // limitations under the License.
     13 //
     14 // Copyright 2005-2010 Google, Inc.
     15 // Author: allauzen (at) google.com (Cyril Allauzen)
     16 //
     17 // \file
     18 // A generic (string,type) list file format.
     19 //
     20 // This is a stripped-down version of STTable that does
     21 // not support the Find() operation but that does support
     22 // reading/writting from standard in/out.
     23 
     24 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
     25 #define FST_EXTENSIONS_FAR_STLIST_H_
     26 
     27 #include <iostream>
     28 #include <fstream>
     29 #include <sstream>
     30 #include <fst/util.h>
     31 
     32 #include <algorithm>
     33 #include <functional>
     34 #include <queue>
     35 #include <string>
     36 #include <utility>
     37 using std::pair; using std::make_pair;
     38 #include <vector>
     39 using std::vector;
     40 
     41 namespace fst {
     42 
     43 static const int32 kSTListMagicNumber = 5656924;
     44 static const int32 kSTListFileVersion = 1;
     45 
     46 // String-type list writing class for object of type 'T' using functor 'W'
     47 // to write an object of type 'T' from a stream. 'W' must conform to the
     48 // following interface:
     49 //
     50 //   struct Writer {
     51 //     void operator()(ostream &, const T &) const;
     52 //   };
     53 //
     54 template <class T, class W>
     55 class STListWriter {
     56  public:
     57   typedef T EntryType;
     58   typedef W EntryWriter;
     59 
     60   explicit STListWriter(const string filename)
     61       : stream_(
     62           filename.empty() ? &cout :
     63           new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
     64         error_(false) {
     65     WriteType(*stream_, kSTListMagicNumber);
     66     WriteType(*stream_, kSTListFileVersion);
     67     if (!stream_) {
     68       FSTERROR() << "STListWriter::STListWriter: error writing to file: "
     69                  << filename;
     70       error_ = true;
     71     }
     72   }
     73 
     74   static STListWriter<T, W> *Create(const string &filename) {
     75     return new STListWriter<T, W>(filename);
     76   }
     77 
     78   void Add(const string &key, const T &t) {
     79     if (key == "") {
     80       FSTERROR() << "STListWriter::Add: key empty: " << key;
     81       error_ = true;
     82     } else if (key < last_key_) {
     83       FSTERROR() << "STListWriter::Add: key disorder: " << key;
     84       error_ = true;
     85     }
     86     if (error_) return;
     87     last_key_ = key;
     88     WriteType(*stream_, key);
     89     entry_writer_(*stream_, t);
     90   }
     91 
     92   bool Error() const { return error_; }
     93 
     94   ~STListWriter() {
     95     WriteType(*stream_, string());
     96     if (stream_ != &cout)
     97       delete stream_;
     98   }
     99 
    100  private:
    101   EntryWriter entry_writer_;  // Write functor for 'EntryType'
    102   ostream *stream_;           // Output stream
    103   string last_key_;           // Last key
    104   bool error_;
    105 
    106   DISALLOW_COPY_AND_ASSIGN(STListWriter);
    107 };
    108 
    109 
    110 // String-type list reading class for object of type 'T' using functor 'R'
    111 // to read an object of type 'T' form a stream. 'R' must conform to the
    112 // following interface:
    113 //
    114 //   struct Reader {
    115 //     T *operator()(istream &) const;
    116 //   };
    117 //
    118 template <class T, class R>
    119 class STListReader {
    120  public:
    121   typedef T EntryType;
    122   typedef R EntryReader;
    123 
    124   explicit STListReader(const vector<string> &filenames)
    125       : sources_(filenames), entry_(0), error_(false) {
    126     streams_.resize(filenames.size(), 0);
    127     bool has_stdin = false;
    128     for (size_t i = 0; i < filenames.size(); ++i) {
    129       if (filenames[i].empty()) {
    130         if (!has_stdin) {
    131           streams_[i] = &cin;
    132           sources_[i] = "stdin";
    133           has_stdin = true;
    134         } else {
    135           FSTERROR() << "STListReader::STListReader: stdin should only "
    136                      << "appear once in the input file list.";
    137           error_ = true;
    138           return;
    139         }
    140       } else {
    141         streams_[i] = new ifstream(
    142             filenames[i].c_str(), ifstream::in | ifstream::binary);
    143       }
    144       int32 magic_number = 0, file_version = 0;
    145       ReadType(*streams_[i], &magic_number);
    146       ReadType(*streams_[i], &file_version);
    147       if (magic_number != kSTListMagicNumber) {
    148         FSTERROR() << "STListReader::STListReader: wrong file type: "
    149                    << filenames[i];
    150         error_ = true;
    151         return;
    152       }
    153       if (file_version != kSTListFileVersion) {
    154         FSTERROR() << "STListReader::STListReader: wrong file version: "
    155                    << filenames[i];
    156         error_ = true;
    157         return;
    158       }
    159       string key;
    160       ReadType(*streams_[i], &key);
    161       if (!key.empty())
    162         heap_.push(make_pair(key, i));
    163       if (!*streams_[i]) {
    164         FSTERROR() << "STListReader: error reading file: " << sources_[i];
    165         error_ = true;
    166         return;
    167       }
    168     }
    169     if (heap_.empty()) return;
    170     size_t current = heap_.top().second;
    171     entry_ = entry_reader_(*streams_[current]);
    172     if (!entry_ || !*streams_[current]) {
    173       FSTERROR() << "STListReader: error reading entry for key: "
    174                  << heap_.top().first << ", file: " << sources_[current];
    175       error_ = true;
    176     }
    177   }
    178 
    179   ~STListReader() {
    180     for (size_t i = 0; i < streams_.size(); ++i) {
    181       if (streams_[i] != &cin)
    182         delete streams_[i];
    183     }
    184     if (entry_)
    185       delete entry_;
    186   }
    187 
    188   static STListReader<T, R> *Open(const string &filename) {
    189     vector<string> filenames;
    190     filenames.push_back(filename);
    191     return new STListReader<T, R>(filenames);
    192   }
    193 
    194   static STListReader<T, R> *Open(const vector<string> &filenames) {
    195     return new STListReader<T, R>(filenames);
    196   }
    197 
    198   void Reset() {
    199     FSTERROR()
    200         << "STListReader::Reset: stlist does not support reset operation";
    201     error_ = true;
    202   }
    203 
    204   bool Find(const string &key) {
    205     FSTERROR()
    206         << "STListReader::Find: stlist does not support find operation";
    207     error_ = true;
    208     return false;
    209   }
    210 
    211   bool Done() const {
    212     return error_ || heap_.empty();
    213   }
    214 
    215   void Next() {
    216     if (error_) return;
    217     size_t current = heap_.top().second;
    218     string key;
    219     heap_.pop();
    220     ReadType(*(streams_[current]), &key);
    221     if (!*streams_[current]) {
    222       FSTERROR() << "STListReader: error reading file: "
    223                  << sources_[current];
    224       error_ = true;
    225       return;
    226     }
    227     if (!key.empty())
    228       heap_.push(make_pair(key, current));
    229 
    230     if(!heap_.empty()) {
    231       current = heap_.top().second;
    232       if (entry_)
    233         delete entry_;
    234       entry_ = entry_reader_(*streams_[current]);
    235       if (!entry_ || !*streams_[current]) {
    236         FSTERROR() << "STListReader: error reading entry for key: "
    237                    << heap_.top().first << ", file: " << sources_[current];
    238         error_ = true;
    239       }
    240     }
    241   }
    242 
    243   const string &GetKey() const {
    244     return heap_.top().first;
    245   }
    246 
    247   const EntryType &GetEntry() const {
    248     return *entry_;
    249   }
    250 
    251   bool Error() const { return error_; }
    252 
    253  private:
    254   EntryReader entry_reader_;   // Read functor for 'EntryType'
    255   vector<istream*> streams_;   // Input streams
    256   vector<string> sources_;     // and corresponding file names
    257   priority_queue<
    258     pair<string, size_t>, vector<pair<string, size_t> >,
    259     greater<pair<string, size_t> > > heap_;  // (Key, stream id) heap
    260   mutable EntryType *entry_;   // Pointer to the currently read entry
    261   bool error_;
    262 
    263   DISALLOW_COPY_AND_ASSIGN(STListReader);
    264 };
    265 
    266 
    267 // String-type list header reading function template on the entry header
    268 // type 'H' having a member function:
    269 //   Read(istream &strm, const string &filename);
    270 // Checks that 'filename' is an STList and call the H::Read() on the last
    271 // entry in the STList.
    272 // Does not support reading from stdin.
    273 template <class H>
    274 bool ReadSTListHeader(const string &filename, H *header) {
    275   if (filename.empty()) {
    276     LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
    277     return false;
    278   }
    279   ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
    280   int32 magic_number = 0, file_version = 0;
    281   ReadType(strm, &magic_number);
    282   ReadType(strm, &file_version);
    283   if (magic_number != kSTListMagicNumber) {
    284     LOG(ERROR) << "ReadSTListHeader: wrong file type: " << filename;
    285     return false;
    286   }
    287   if (file_version != kSTListFileVersion) {
    288     LOG(ERROR) << "ReadSTListHeader: wrong file version: " << filename;
    289     return false;
    290   }
    291   string key;
    292   ReadType(strm, &key);
    293   header->Read(strm, filename + ":" + key);
    294   if (!strm) {
    295     LOG(ERROR) << "ReadSTListHeader: error reading file: " << filename;
    296     return false;
    297   }
    298   return true;
    299 }
    300 
    301 bool IsSTList(const string &filename);
    302 
    303 }  // namespace fst
    304 
    305 #endif  // FST_EXTENSIONS_FAR_STLIST_H_
    306