1 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 // 14 // Copyright 2005-2010 Google, Inc. 15 // Author: allauzen (at) google.com (Cyril Allauzen) 16 // 17 // \file 18 // A generic (string,type) list file format. 19 // 20 // This is a stripped-down version of STTable that does 21 // not support the Find() operation but that does support 22 // reading/writting from standard in/out. 23 24 #ifndef FST_EXTENSIONS_FAR_STLIST_H_ 25 #define FST_EXTENSIONS_FAR_STLIST_H_ 26 27 #include <iostream> 28 #include <fstream> 29 #include <sstream> 30 #include <fst/util.h> 31 32 #include <algorithm> 33 #include <functional> 34 #include <queue> 35 #include <string> 36 #include <utility> 37 using std::pair; using std::make_pair; 38 #include <vector> 39 using std::vector; 40 41 namespace fst { 42 43 static const int32 kSTListMagicNumber = 5656924; 44 static const int32 kSTListFileVersion = 1; 45 46 // String-type list writing class for object of type 'T' using functor 'W' 47 // to write an object of type 'T' from a stream. 'W' must conform to the 48 // following interface: 49 // 50 // struct Writer { 51 // void operator()(ostream &, const T &) const; 52 // }; 53 // 54 template <class T, class W> 55 class STListWriter { 56 public: 57 typedef T EntryType; 58 typedef W EntryWriter; 59 60 explicit STListWriter(const string filename) 61 : stream_( 62 filename.empty() ? &cout : 63 new ofstream(filename.c_str(), ofstream::out | ofstream::binary)), 64 error_(false) { 65 WriteType(*stream_, kSTListMagicNumber); 66 WriteType(*stream_, kSTListFileVersion); 67 if (!stream_) { 68 FSTERROR() << "STListWriter::STListWriter: error writing to file: " 69 << filename; 70 error_ = true; 71 } 72 } 73 74 static STListWriter<T, W> *Create(const string &filename) { 75 return new STListWriter<T, W>(filename); 76 } 77 78 void Add(const string &key, const T &t) { 79 if (key == "") { 80 FSTERROR() << "STListWriter::Add: key empty: " << key; 81 error_ = true; 82 } else if (key < last_key_) { 83 FSTERROR() << "STListWriter::Add: key disorder: " << key; 84 error_ = true; 85 } 86 if (error_) return; 87 last_key_ = key; 88 WriteType(*stream_, key); 89 entry_writer_(*stream_, t); 90 } 91 92 bool Error() const { return error_; } 93 94 ~STListWriter() { 95 WriteType(*stream_, string()); 96 if (stream_ != &cout) 97 delete stream_; 98 } 99 100 private: 101 EntryWriter entry_writer_; // Write functor for 'EntryType' 102 ostream *stream_; // Output stream 103 string last_key_; // Last key 104 bool error_; 105 106 DISALLOW_COPY_AND_ASSIGN(STListWriter); 107 }; 108 109 110 // String-type list reading class for object of type 'T' using functor 'R' 111 // to read an object of type 'T' form a stream. 'R' must conform to the 112 // following interface: 113 // 114 // struct Reader { 115 // T *operator()(istream &) const; 116 // }; 117 // 118 template <class T, class R> 119 class STListReader { 120 public: 121 typedef T EntryType; 122 typedef R EntryReader; 123 124 explicit STListReader(const vector<string> &filenames) 125 : sources_(filenames), entry_(0), error_(false) { 126 streams_.resize(filenames.size(), 0); 127 bool has_stdin = false; 128 for (size_t i = 0; i < filenames.size(); ++i) { 129 if (filenames[i].empty()) { 130 if (!has_stdin) { 131 streams_[i] = &cin; 132 sources_[i] = "stdin"; 133 has_stdin = true; 134 } else { 135 FSTERROR() << "STListReader::STListReader: stdin should only " 136 << "appear once in the input file list."; 137 error_ = true; 138 return; 139 } 140 } else { 141 streams_[i] = new ifstream( 142 filenames[i].c_str(), ifstream::in | ifstream::binary); 143 } 144 int32 magic_number = 0, file_version = 0; 145 ReadType(*streams_[i], &magic_number); 146 ReadType(*streams_[i], &file_version); 147 if (magic_number != kSTListMagicNumber) { 148 FSTERROR() << "STListReader::STListReader: wrong file type: " 149 << filenames[i]; 150 error_ = true; 151 return; 152 } 153 if (file_version != kSTListFileVersion) { 154 FSTERROR() << "STListReader::STListReader: wrong file version: " 155 << filenames[i]; 156 error_ = true; 157 return; 158 } 159 string key; 160 ReadType(*streams_[i], &key); 161 if (!key.empty()) 162 heap_.push(make_pair(key, i)); 163 if (!*streams_[i]) { 164 FSTERROR() << "STListReader: error reading file: " << sources_[i]; 165 error_ = true; 166 return; 167 } 168 } 169 if (heap_.empty()) return; 170 size_t current = heap_.top().second; 171 entry_ = entry_reader_(*streams_[current]); 172 if (!entry_ || !*streams_[current]) { 173 FSTERROR() << "STListReader: error reading entry for key: " 174 << heap_.top().first << ", file: " << sources_[current]; 175 error_ = true; 176 } 177 } 178 179 ~STListReader() { 180 for (size_t i = 0; i < streams_.size(); ++i) { 181 if (streams_[i] != &cin) 182 delete streams_[i]; 183 } 184 if (entry_) 185 delete entry_; 186 } 187 188 static STListReader<T, R> *Open(const string &filename) { 189 vector<string> filenames; 190 filenames.push_back(filename); 191 return new STListReader<T, R>(filenames); 192 } 193 194 static STListReader<T, R> *Open(const vector<string> &filenames) { 195 return new STListReader<T, R>(filenames); 196 } 197 198 void Reset() { 199 FSTERROR() 200 << "STListReader::Reset: stlist does not support reset operation"; 201 error_ = true; 202 } 203 204 bool Find(const string &key) { 205 FSTERROR() 206 << "STListReader::Find: stlist does not support find operation"; 207 error_ = true; 208 return false; 209 } 210 211 bool Done() const { 212 return error_ || heap_.empty(); 213 } 214 215 void Next() { 216 if (error_) return; 217 size_t current = heap_.top().second; 218 string key; 219 heap_.pop(); 220 ReadType(*(streams_[current]), &key); 221 if (!*streams_[current]) { 222 FSTERROR() << "STListReader: error reading file: " 223 << sources_[current]; 224 error_ = true; 225 return; 226 } 227 if (!key.empty()) 228 heap_.push(make_pair(key, current)); 229 230 if(!heap_.empty()) { 231 current = heap_.top().second; 232 if (entry_) 233 delete entry_; 234 entry_ = entry_reader_(*streams_[current]); 235 if (!entry_ || !*streams_[current]) { 236 FSTERROR() << "STListReader: error reading entry for key: " 237 << heap_.top().first << ", file: " << sources_[current]; 238 error_ = true; 239 } 240 } 241 } 242 243 const string &GetKey() const { 244 return heap_.top().first; 245 } 246 247 const EntryType &GetEntry() const { 248 return *entry_; 249 } 250 251 bool Error() const { return error_; } 252 253 private: 254 EntryReader entry_reader_; // Read functor for 'EntryType' 255 vector<istream*> streams_; // Input streams 256 vector<string> sources_; // and corresponding file names 257 priority_queue< 258 pair<string, size_t>, vector<pair<string, size_t> >, 259 greater<pair<string, size_t> > > heap_; // (Key, stream id) heap 260 mutable EntryType *entry_; // Pointer to the currently read entry 261 bool error_; 262 263 DISALLOW_COPY_AND_ASSIGN(STListReader); 264 }; 265 266 267 // String-type list header reading function template on the entry header 268 // type 'H' having a member function: 269 // Read(istream &strm, const string &filename); 270 // Checks that 'filename' is an STList and call the H::Read() on the last 271 // entry in the STList. 272 // Does not support reading from stdin. 273 template <class H> 274 bool ReadSTListHeader(const string &filename, H *header) { 275 if (filename.empty()) { 276 LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin"; 277 return false; 278 } 279 ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); 280 int32 magic_number = 0, file_version = 0; 281 ReadType(strm, &magic_number); 282 ReadType(strm, &file_version); 283 if (magic_number != kSTListMagicNumber) { 284 LOG(ERROR) << "ReadSTListHeader: wrong file type: " << filename; 285 return false; 286 } 287 if (file_version != kSTListFileVersion) { 288 LOG(ERROR) << "ReadSTListHeader: wrong file version: " << filename; 289 return false; 290 } 291 string key; 292 ReadType(strm, &key); 293 header->Read(strm, filename + ":" + key); 294 if (!strm) { 295 LOG(ERROR) << "ReadSTListHeader: error reading file: " << filename; 296 return false; 297 } 298 return true; 299 } 300 301 bool IsSTList(const string &filename); 302 303 } // namespace fst 304 305 #endif // FST_EXTENSIONS_FAR_STLIST_H_ 306