1 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 // 14 // Copyright 2005-2010 Google, Inc. 15 // Author: allauzen (at) google.com (Cyril Allauzen) 16 // 17 // \file 18 // A generic (string,type) list file format. 19 // 20 // This is a stripped-down version of STTable that does 21 // not support the Find() operation but that does support 22 // reading/writting from standard in/out. 23 24 #ifndef FST_EXTENSIONS_FAR_STLIST_H_ 25 #define FST_EXTENSIONS_FAR_STLIST_H_ 26 27 #include <iostream> 28 #include <fstream> 29 #include <fst/util.h> 30 31 #include <algorithm> 32 #include <functional> 33 #include <queue> 34 #include <string> 35 #include <utility> 36 using std::pair; using std::make_pair; 37 #include <vector> 38 using std::vector; 39 40 namespace fst { 41 42 static const int32 kSTListMagicNumber = 5656924; 43 static const int32 kSTListFileVersion = 1; 44 45 // String-type list writing class for object of type 'T' using functor 'W' 46 // to write an object of type 'T' from a stream. 'W' must conform to the 47 // following interface: 48 // 49 // struct Writer { 50 // void operator()(ostream &, const T &) const; 51 // }; 52 // 53 template <class T, class W> 54 class STListWriter { 55 public: 56 typedef T EntryType; 57 typedef W EntryWriter; 58 59 explicit STListWriter(const string filename) 60 : stream_( 61 filename.empty() ? &std::cout : 62 new ofstream(filename.c_str(), ofstream::out | ofstream::binary)), 63 error_(false) { 64 WriteType(*stream_, kSTListMagicNumber); 65 WriteType(*stream_, kSTListFileVersion); 66 if (!stream_) { 67 FSTERROR() << "STListWriter::STListWriter: error writing to file: " 68 << filename; 69 error_ = true; 70 } 71 } 72 73 static STListWriter<T, W> *Create(const string &filename) { 74 return new STListWriter<T, W>(filename); 75 } 76 77 void Add(const string &key, const T &t) { 78 if (key == "") { 79 FSTERROR() << "STListWriter::Add: key empty: " << key; 80 error_ = true; 81 } else if (key < last_key_) { 82 FSTERROR() << "STListWriter::Add: key disorder: " << key; 83 error_ = true; 84 } 85 if (error_) return; 86 last_key_ = key; 87 WriteType(*stream_, key); 88 entry_writer_(*stream_, t); 89 } 90 91 bool Error() const { return error_; } 92 93 ~STListWriter() { 94 WriteType(*stream_, string()); 95 if (stream_ != &std::cout) 96 delete stream_; 97 } 98 99 private: 100 EntryWriter entry_writer_; // Write functor for 'EntryType' 101 ostream *stream_; // Output stream 102 string last_key_; // Last key 103 bool error_; 104 105 DISALLOW_COPY_AND_ASSIGN(STListWriter); 106 }; 107 108 109 // String-type list reading class for object of type 'T' using functor 'R' 110 // to read an object of type 'T' form a stream. 'R' must conform to the 111 // following interface: 112 // 113 // struct Reader { 114 // T *operator()(istream &) const; 115 // }; 116 // 117 template <class T, class R> 118 class STListReader { 119 public: 120 typedef T EntryType; 121 typedef R EntryReader; 122 123 explicit STListReader(const vector<string> &filenames) 124 : sources_(filenames), entry_(0), error_(false) { 125 streams_.resize(filenames.size(), 0); 126 bool has_stdin = false; 127 for (size_t i = 0; i < filenames.size(); ++i) { 128 if (filenames[i].empty()) { 129 if (!has_stdin) { 130 streams_[i] = &std::cin; 131 sources_[i] = "stdin"; 132 has_stdin = true; 133 } else { 134 FSTERROR() << "STListReader::STListReader: stdin should only " 135 << "appear once in the input file list."; 136 error_ = true; 137 return; 138 } 139 } else { 140 streams_[i] = new ifstream( 141 filenames[i].c_str(), ifstream::in | ifstream::binary); 142 } 143 int32 magic_number = 0, file_version = 0; 144 ReadType(*streams_[i], &magic_number); 145 ReadType(*streams_[i], &file_version); 146 if (magic_number != kSTListMagicNumber) { 147 FSTERROR() << "STListReader::STTableReader: wrong file type: " 148 << filenames[i]; 149 error_ = true; 150 return; 151 } 152 if (file_version != kSTListFileVersion) { 153 FSTERROR() << "STListReader::STTableReader: wrong file version: " 154 << filenames[i]; 155 error_ = true; 156 return; 157 } 158 string key; 159 ReadType(*streams_[i], &key); 160 if (!key.empty()) 161 heap_.push(make_pair(key, i)); 162 if (!*streams_[i]) { 163 FSTERROR() << "STTableReader: error reading file: " << sources_[i]; 164 error_ = true; 165 return; 166 } 167 } 168 if (heap_.empty()) return; 169 size_t current = heap_.top().second; 170 entry_ = entry_reader_(*streams_[current]); 171 if (!entry_ || !*streams_[current]) { 172 FSTERROR() << "STTableReader: error reading entry for key: " 173 << heap_.top().first << ", file: " << sources_[current]; 174 error_ = true; 175 } 176 } 177 178 ~STListReader() { 179 for (size_t i = 0; i < streams_.size(); ++i) { 180 if (streams_[i] != &std::cin) 181 delete streams_[i]; 182 } 183 if (entry_) 184 delete entry_; 185 } 186 187 static STListReader<T, R> *Open(const string &filename) { 188 vector<string> filenames; 189 filenames.push_back(filename); 190 return new STListReader<T, R>(filenames); 191 } 192 193 static STListReader<T, R> *Open(const vector<string> &filenames) { 194 return new STListReader<T, R>(filenames); 195 } 196 197 void Reset() { 198 FSTERROR() 199 << "STListReader::Reset: stlist does not support reset operation"; 200 error_ = true; 201 } 202 203 bool Find(const string &key) { 204 FSTERROR() 205 << "STListReader::Find: stlist does not support find operation"; 206 error_ = true; 207 return false; 208 } 209 210 bool Done() const { 211 return error_ || heap_.empty(); 212 } 213 214 void Next() { 215 if (error_) return; 216 size_t current = heap_.top().second; 217 string key; 218 heap_.pop(); 219 ReadType(*(streams_[current]), &key); 220 if (!*streams_[current]) { 221 FSTERROR() << "STTableReader: error reading file: " 222 << sources_[current]; 223 error_ = true; 224 return; 225 } 226 if (!key.empty()) 227 heap_.push(make_pair(key, current)); 228 229 if(!heap_.empty()) { 230 current = heap_.top().second; 231 if (entry_) 232 delete entry_; 233 entry_ = entry_reader_(*streams_[current]); 234 if (!entry_ || !*streams_[current]) { 235 FSTERROR() << "STTableReader: error reading entry for key: " 236 << heap_.top().first << ", file: " << sources_[current]; 237 error_ = true; 238 } 239 } 240 } 241 242 const string &GetKey() const { 243 return heap_.top().first; 244 } 245 246 const EntryType &GetEntry() const { 247 return *entry_; 248 } 249 250 bool Error() const { return error_; } 251 252 private: 253 EntryReader entry_reader_; // Read functor for 'EntryType' 254 vector<istream*> streams_; // Input streams 255 vector<string> sources_; // and corresponding file names 256 priority_queue< 257 pair<string, size_t>, vector<pair<string, size_t> >, 258 greater<pair<string, size_t> > > heap_; // (Key, stream id) heap 259 mutable EntryType *entry_; // Pointer to the currently read entry 260 bool error_; 261 262 DISALLOW_COPY_AND_ASSIGN(STListReader); 263 }; 264 265 266 // String-type list header reading function template on the entry header 267 // type 'H' having a member function: 268 // Read(istream &strm, const string &filename); 269 // Checks that 'filename' is an STTable and call the H::Read() on the last 270 // entry in the STTable. 271 // Does not support reading from stdin. 272 template <class H> 273 bool ReadSTListHeader(const string &filename, H *header) { 274 if (filename.empty()) { 275 LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin"; 276 return false; 277 } 278 ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); 279 int32 magic_number = 0, file_version = 0; 280 ReadType(strm, &magic_number); 281 ReadType(strm, &file_version); 282 if (magic_number != kSTListMagicNumber) { 283 LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename; 284 return false; 285 } 286 if (file_version != kSTListFileVersion) { 287 LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename; 288 return false; 289 } 290 string key; 291 ReadType(strm, &key); 292 header->Read(strm, filename + ":" + key); 293 if (!strm) { 294 LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename; 295 return false; 296 } 297 return true; 298 } 299 300 bool IsSTList(const string &filename); 301 302 } // namespace fst 303 304 #endif // FST_EXTENSIONS_FAR_STLIST_H_ 305