Home | History | Annotate | Download | only in far
      1 
      2 // Licensed under the Apache License, Version 2.0 (the "License");
      3 // you may not use this file except in compliance with the License.
      4 // You may obtain a copy of the License at
      5 //
      6 //     http://www.apache.org/licenses/LICENSE-2.0
      7 //
      8 // Unless required by applicable law or agreed to in writing, software
      9 // distributed under the License is distributed on an "AS IS" BASIS,
     10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     11 // See the License for the specific language governing permissions and
     12 // limitations under the License.
     13 //
     14 // Copyright 2005-2010 Google, Inc.
     15 // Author: allauzen (at) google.com (Cyril Allauzen)
     16 //
     17 // \file
     18 // A generic (string,type) list file format.
     19 //
     20 // This is a stripped-down version of STTable that does
     21 // not support the Find() operation but that does support
     22 // reading/writting from standard in/out.
     23 
     24 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
     25 #define FST_EXTENSIONS_FAR_STLIST_H_
     26 
     27 #include <iostream>
     28 #include <fstream>
     29 #include <fst/util.h>
     30 
     31 #include <algorithm>
     32 #include <functional>
     33 #include <queue>
     34 #include <string>
     35 #include <utility>
     36 using std::pair; using std::make_pair;
     37 #include <vector>
     38 using std::vector;
     39 
     40 namespace fst {
     41 
     42 static const int32 kSTListMagicNumber = 5656924;
     43 static const int32 kSTListFileVersion = 1;
     44 
     45 // String-type list writing class for object of type 'T' using functor 'W'
     46 // to write an object of type 'T' from a stream. 'W' must conform to the
     47 // following interface:
     48 //
     49 //   struct Writer {
     50 //     void operator()(ostream &, const T &) const;
     51 //   };
     52 //
     53 template <class T, class W>
     54 class STListWriter {
     55  public:
     56   typedef T EntryType;
     57   typedef W EntryWriter;
     58 
     59   explicit STListWriter(const string filename)
     60       : stream_(
     61           filename.empty() ? &std::cout :
     62           new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
     63         error_(false) {
     64     WriteType(*stream_, kSTListMagicNumber);
     65     WriteType(*stream_, kSTListFileVersion);
     66     if (!stream_) {
     67       FSTERROR() << "STListWriter::STListWriter: error writing to file: "
     68                  << filename;
     69       error_ = true;
     70     }
     71   }
     72 
     73   static STListWriter<T, W> *Create(const string &filename) {
     74     return new STListWriter<T, W>(filename);
     75   }
     76 
     77   void Add(const string &key, const T &t) {
     78     if (key == "") {
     79       FSTERROR() << "STListWriter::Add: key empty: " << key;
     80       error_ = true;
     81     } else if (key < last_key_) {
     82       FSTERROR() << "STListWriter::Add: key disorder: " << key;
     83       error_ = true;
     84     }
     85     if (error_) return;
     86     last_key_ = key;
     87     WriteType(*stream_, key);
     88     entry_writer_(*stream_, t);
     89   }
     90 
     91   bool Error() const { return error_; }
     92 
     93   ~STListWriter() {
     94     WriteType(*stream_, string());
     95     if (stream_ != &std::cout)
     96       delete stream_;
     97   }
     98 
     99  private:
    100   EntryWriter entry_writer_;  // Write functor for 'EntryType'
    101   ostream *stream_;           // Output stream
    102   string last_key_;           // Last key
    103   bool error_;
    104 
    105   DISALLOW_COPY_AND_ASSIGN(STListWriter);
    106 };
    107 
    108 
    109 // String-type list reading class for object of type 'T' using functor 'R'
    110 // to read an object of type 'T' form a stream. 'R' must conform to the
    111 // following interface:
    112 //
    113 //   struct Reader {
    114 //     T *operator()(istream &) const;
    115 //   };
    116 //
    117 template <class T, class R>
    118 class STListReader {
    119  public:
    120   typedef T EntryType;
    121   typedef R EntryReader;
    122 
    123   explicit STListReader(const vector<string> &filenames)
    124       : sources_(filenames), entry_(0), error_(false) {
    125     streams_.resize(filenames.size(), 0);
    126     bool has_stdin = false;
    127     for (size_t i = 0; i < filenames.size(); ++i) {
    128       if (filenames[i].empty()) {
    129         if (!has_stdin) {
    130           streams_[i] = &std::cin;
    131           sources_[i] = "stdin";
    132           has_stdin = true;
    133         } else {
    134           FSTERROR() << "STListReader::STListReader: stdin should only "
    135                      << "appear once in the input file list.";
    136           error_ = true;
    137           return;
    138         }
    139       } else {
    140         streams_[i] = new ifstream(
    141             filenames[i].c_str(), ifstream::in | ifstream::binary);
    142       }
    143       int32 magic_number = 0, file_version = 0;
    144       ReadType(*streams_[i], &magic_number);
    145       ReadType(*streams_[i], &file_version);
    146       if (magic_number != kSTListMagicNumber) {
    147         FSTERROR() << "STListReader::STTableReader: wrong file type: "
    148                    << filenames[i];
    149         error_ = true;
    150         return;
    151       }
    152       if (file_version != kSTListFileVersion) {
    153         FSTERROR() << "STListReader::STTableReader: wrong file version: "
    154                    << filenames[i];
    155         error_ = true;
    156         return;
    157       }
    158       string key;
    159       ReadType(*streams_[i], &key);
    160       if (!key.empty())
    161         heap_.push(make_pair(key, i));
    162       if (!*streams_[i]) {
    163         FSTERROR() << "STTableReader: error reading file: " << sources_[i];
    164         error_ = true;
    165         return;
    166       }
    167     }
    168     if (heap_.empty()) return;
    169     size_t current = heap_.top().second;
    170     entry_ = entry_reader_(*streams_[current]);
    171     if (!entry_ || !*streams_[current]) {
    172       FSTERROR() << "STTableReader: error reading entry for key: "
    173                  << heap_.top().first << ", file: " << sources_[current];
    174       error_ = true;
    175     }
    176   }
    177 
    178   ~STListReader() {
    179     for (size_t i = 0; i < streams_.size(); ++i) {
    180       if (streams_[i] != &std::cin)
    181         delete streams_[i];
    182     }
    183     if (entry_)
    184       delete entry_;
    185   }
    186 
    187   static STListReader<T, R> *Open(const string &filename) {
    188     vector<string> filenames;
    189     filenames.push_back(filename);
    190     return new STListReader<T, R>(filenames);
    191   }
    192 
    193   static STListReader<T, R> *Open(const vector<string> &filenames) {
    194     return new STListReader<T, R>(filenames);
    195   }
    196 
    197   void Reset() {
    198     FSTERROR()
    199         << "STListReader::Reset: stlist does not support reset operation";
    200     error_ = true;
    201   }
    202 
    203   bool Find(const string &key) {
    204     FSTERROR()
    205         << "STListReader::Find: stlist does not support find operation";
    206     error_ = true;
    207     return false;
    208   }
    209 
    210   bool Done() const {
    211     return error_ || heap_.empty();
    212   }
    213 
    214   void Next() {
    215     if (error_) return;
    216     size_t current = heap_.top().second;
    217     string key;
    218     heap_.pop();
    219     ReadType(*(streams_[current]), &key);
    220     if (!*streams_[current]) {
    221       FSTERROR() << "STTableReader: error reading file: "
    222                  << sources_[current];
    223       error_ = true;
    224       return;
    225     }
    226     if (!key.empty())
    227       heap_.push(make_pair(key, current));
    228 
    229     if(!heap_.empty()) {
    230       current = heap_.top().second;
    231       if (entry_)
    232         delete entry_;
    233       entry_ = entry_reader_(*streams_[current]);
    234       if (!entry_ || !*streams_[current]) {
    235         FSTERROR() << "STTableReader: error reading entry for key: "
    236                    << heap_.top().first << ", file: " << sources_[current];
    237         error_ = true;
    238       }
    239     }
    240   }
    241 
    242   const string &GetKey() const {
    243     return heap_.top().first;
    244   }
    245 
    246   const EntryType &GetEntry() const {
    247     return *entry_;
    248   }
    249 
    250   bool Error() const { return error_; }
    251 
    252  private:
    253   EntryReader entry_reader_;   // Read functor for 'EntryType'
    254   vector<istream*> streams_;   // Input streams
    255   vector<string> sources_;     // and corresponding file names
    256   priority_queue<
    257     pair<string, size_t>, vector<pair<string, size_t> >,
    258     greater<pair<string, size_t> > > heap_;  // (Key, stream id) heap
    259   mutable EntryType *entry_;   // Pointer to the currently read entry
    260   bool error_;
    261 
    262   DISALLOW_COPY_AND_ASSIGN(STListReader);
    263 };
    264 
    265 
    266 // String-type list header reading function template on the entry header
    267 // type 'H' having a member function:
    268 //   Read(istream &strm, const string &filename);
    269 // Checks that 'filename' is an STTable and call the H::Read() on the last
    270 // entry in the STTable.
    271 // Does not support reading from stdin.
    272 template <class H>
    273 bool ReadSTListHeader(const string &filename, H *header) {
    274   if (filename.empty()) {
    275     LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
    276     return false;
    277   }
    278   ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
    279   int32 magic_number = 0, file_version = 0;
    280   ReadType(strm, &magic_number);
    281   ReadType(strm, &file_version);
    282   if (magic_number != kSTListMagicNumber) {
    283     LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename;
    284     return false;
    285   }
    286   if (file_version != kSTListFileVersion) {
    287     LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename;
    288     return false;
    289   }
    290   string key;
    291   ReadType(strm, &key);
    292   header->Read(strm, filename + ":" + key);
    293   if (!strm) {
    294     LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
    295     return false;
    296   }
    297   return true;
    298 }
    299 
    300 bool IsSTList(const string &filename);
    301 
    302 }  // namespace fst
    303 
    304 #endif  // FST_EXTENSIONS_FAR_STLIST_H_
    305