1 // far.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Finite-State Transducer (FST) archive classes. 20 // 21 22 #ifndef FST_EXTENSIONS_FAR_FAR_H__ 23 #define FST_EXTENSIONS_FAR_FAR_H__ 24 25 #include <fst/extensions/far/stlist.h> 26 #include <fst/extensions/far/sttable.h> 27 #include <fst/fst.h> 28 #include <fst/vector-fst.h> 29 30 namespace fst { 31 32 enum FarEntryType { FET_LINE, FET_FILE }; 33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; 34 35 inline bool IsFst(const string &filename) { 36 ifstream strm(filename.c_str()); 37 if (!strm) 38 return false; 39 return IsFstHeader(strm, filename); 40 } 41 42 // FST archive header class 43 class FarHeader { 44 public: 45 const string &FarType() const { return fartype_; } 46 const string &ArcType() const { return arctype_; } 47 48 bool Read(const string &filename) { 49 FstHeader fsthdr; 50 if (filename.empty()) { 51 // Header reading unsupported on stdin. Assumes STList and StdArc. 52 fartype_ = "stlist"; 53 arctype_ = "standard"; 54 return true; 55 } else if (IsSTTable(filename)) { // Check if STTable 56 ReadSTTableHeader(filename, &fsthdr); 57 fartype_ = "sttable"; 58 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); 59 return true; 60 } else if (IsSTList(filename)) { // Check if STList 61 ReadSTListHeader(filename, &fsthdr); 62 fartype_ = "sttable"; 63 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); 64 return true; 65 } else if (IsFst(filename)) { // Check if Fst 66 ifstream istrm(filename.c_str()); 67 fsthdr.Read(istrm, filename); 68 fartype_ = "fst"; 69 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); 70 return true; 71 } 72 return false; 73 } 74 75 private: 76 string fartype_; 77 string arctype_; 78 }; 79 80 enum FarType { 81 FAR_DEFAULT = 0, 82 FAR_STTABLE = 1, 83 FAR_STLIST = 2, 84 FAR_FST = 3, 85 }; 86 87 // This class creates an archive of FSTs. 88 template <class A> 89 class FarWriter { 90 public: 91 typedef A Arc; 92 93 // Creates a new (empty) FST archive; returns NULL on error. 94 static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT); 95 96 // Adds an FST to the end of an archive. Keys must be non-empty and 97 // in lexicographic order. FSTs must have a suitable write method. 98 virtual void Add(const string &key, const Fst<A> &fst) = 0; 99 100 virtual FarType Type() const = 0; 101 102 virtual bool Error() const = 0; 103 104 virtual ~FarWriter() {} 105 106 protected: 107 FarWriter() {} 108 109 private: 110 DISALLOW_COPY_AND_ASSIGN(FarWriter); 111 }; 112 113 114 // This class iterates through an existing archive of FSTs. 115 template <class A> 116 class FarReader { 117 public: 118 typedef A Arc; 119 120 // Opens an existing FST archive in a single file; returns NULL on error. 121 // Sets current position to the beginning of the achive. 122 static FarReader *Open(const string &filename); 123 124 // Opens an existing FST archive in multiple files; returns NULL on error. 125 // Sets current position to the beginning of the achive. 126 static FarReader *Open(const vector<string> &filenames); 127 128 // Resets current posision to beginning of archive. 129 virtual void Reset() = 0; 130 131 // Sets current position to first entry >= key. Returns true if a match. 132 virtual bool Find(const string &key) = 0; 133 134 // Current position at end of archive? 135 virtual bool Done() const = 0; 136 137 // Move current position to next FST. 138 virtual void Next() = 0; 139 140 // Returns key at the current position. This reference is invalidated if 141 // the current position in the archive is changed. 142 virtual const string &GetKey() const = 0; 143 144 // Returns FST at the current position. This reference is invalidated if 145 // the current position in the archive is changed. 146 virtual const Fst<A> &GetFst() const = 0; 147 148 virtual FarType Type() const = 0; 149 150 virtual bool Error() const = 0; 151 152 virtual ~FarReader() {} 153 154 protected: 155 FarReader() {} 156 157 private: 158 DISALLOW_COPY_AND_ASSIGN(FarReader); 159 }; 160 161 162 template <class A> 163 class FstWriter { 164 public: 165 void operator()(ostream &strm, const Fst<A> &fst) const { 166 fst.Write(strm, FstWriteOptions()); 167 } 168 }; 169 170 171 template <class A> 172 class STTableFarWriter : public FarWriter<A> { 173 public: 174 typedef A Arc; 175 176 static STTableFarWriter *Create(const string &filename) { 177 STTableWriter<Fst<A>, FstWriter<A> > *writer = 178 STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); 179 return new STTableFarWriter(writer); 180 } 181 182 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } 183 184 FarType Type() const { return FAR_STTABLE; } 185 186 bool Error() const { return writer_->Error(); } 187 188 ~STTableFarWriter() { delete writer_; } 189 190 private: 191 explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer) 192 : writer_(writer) {} 193 194 private: 195 STTableWriter<Fst<A>, FstWriter<A> > *writer_; 196 197 DISALLOW_COPY_AND_ASSIGN(STTableFarWriter); 198 }; 199 200 201 template <class A> 202 class STListFarWriter : public FarWriter<A> { 203 public: 204 typedef A Arc; 205 206 static STListFarWriter *Create(const string &filename) { 207 STListWriter<Fst<A>, FstWriter<A> > *writer = 208 STListWriter<Fst<A>, FstWriter<A> >::Create(filename); 209 return new STListFarWriter(writer); 210 } 211 212 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } 213 214 FarType Type() const { return FAR_STLIST; } 215 216 bool Error() const { return writer_->Error(); } 217 218 ~STListFarWriter() { delete writer_; } 219 220 private: 221 explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer) 222 : writer_(writer) {} 223 224 private: 225 STListWriter<Fst<A>, FstWriter<A> > *writer_; 226 227 DISALLOW_COPY_AND_ASSIGN(STListFarWriter); 228 }; 229 230 231 template <class A> 232 class FstFarWriter : public FarWriter<A> { 233 public: 234 typedef A Arc; 235 236 explicit FstFarWriter(const string &filename) 237 : filename_(filename), error_(false), written_(false) {} 238 239 static FstFarWriter *Create(const string &filename) { 240 return new FstFarWriter(filename); 241 } 242 243 void Add(const string &key, const Fst<A> &fst) { 244 if (written_) { 245 LOG(WARNING) << "FstFarWriter::Add: only one Fst supported," 246 << " subsequent entries discarded."; 247 } else { 248 error_ = !fst.Write(filename_); 249 written_ = true; 250 } 251 } 252 253 FarType Type() const { return FAR_FST; } 254 255 bool Error() const { return error_; } 256 257 ~FstFarWriter() {} 258 259 private: 260 string filename_; 261 bool error_; 262 bool written_; 263 264 DISALLOW_COPY_AND_ASSIGN(FstFarWriter); 265 }; 266 267 268 template <class A> 269 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { 270 switch(type) { 271 case FAR_DEFAULT: 272 if (filename.empty()) 273 return STListFarWriter<A>::Create(filename); 274 case FAR_STTABLE: 275 return STTableFarWriter<A>::Create(filename); 276 case FAR_STLIST: 277 return STListFarWriter<A>::Create(filename); 278 case FAR_FST: 279 return FstFarWriter<A>::Create(filename); 280 default: 281 LOG(ERROR) << "FarWriter::Create: unknown far type"; 282 return 0; 283 } 284 } 285 286 287 template <class A> 288 class FstReader { 289 public: 290 Fst<A> *operator()(istream &strm) const { 291 return Fst<A>::Read(strm, FstReadOptions()); 292 } 293 }; 294 295 296 template <class A> 297 class STTableFarReader : public FarReader<A> { 298 public: 299 typedef A Arc; 300 301 static STTableFarReader *Open(const string &filename) { 302 STTableReader<Fst<A>, FstReader<A> > *reader = 303 STTableReader<Fst<A>, FstReader<A> >::Open(filename); 304 // TODO: error check 305 return new STTableFarReader(reader); 306 } 307 308 static STTableFarReader *Open(const vector<string> &filenames) { 309 STTableReader<Fst<A>, FstReader<A> > *reader = 310 STTableReader<Fst<A>, FstReader<A> >::Open(filenames); 311 // TODO: error check 312 return new STTableFarReader(reader); 313 } 314 315 void Reset() { reader_->Reset(); } 316 317 bool Find(const string &key) { return reader_->Find(key); } 318 319 bool Done() const { return reader_->Done(); } 320 321 void Next() { return reader_->Next(); } 322 323 const string &GetKey() const { return reader_->GetKey(); } 324 325 const Fst<A> &GetFst() const { return reader_->GetEntry(); } 326 327 FarType Type() const { return FAR_STTABLE; } 328 329 bool Error() const { return reader_->Error(); } 330 331 ~STTableFarReader() { delete reader_; } 332 333 private: 334 explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader) 335 : reader_(reader) {} 336 337 private: 338 STTableReader<Fst<A>, FstReader<A> > *reader_; 339 340 DISALLOW_COPY_AND_ASSIGN(STTableFarReader); 341 }; 342 343 344 template <class A> 345 class STListFarReader : public FarReader<A> { 346 public: 347 typedef A Arc; 348 349 static STListFarReader *Open(const string &filename) { 350 STListReader<Fst<A>, FstReader<A> > *reader = 351 STListReader<Fst<A>, FstReader<A> >::Open(filename); 352 // TODO: error check 353 return new STListFarReader(reader); 354 } 355 356 static STListFarReader *Open(const vector<string> &filenames) { 357 STListReader<Fst<A>, FstReader<A> > *reader = 358 STListReader<Fst<A>, FstReader<A> >::Open(filenames); 359 // TODO: error check 360 return new STListFarReader(reader); 361 } 362 363 void Reset() { reader_->Reset(); } 364 365 bool Find(const string &key) { return reader_->Find(key); } 366 367 bool Done() const { return reader_->Done(); } 368 369 void Next() { return reader_->Next(); } 370 371 const string &GetKey() const { return reader_->GetKey(); } 372 373 const Fst<A> &GetFst() const { return reader_->GetEntry(); } 374 375 FarType Type() const { return FAR_STLIST; } 376 377 bool Error() const { return reader_->Error(); } 378 379 ~STListFarReader() { delete reader_; } 380 381 private: 382 explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader) 383 : reader_(reader) {} 384 385 private: 386 STListReader<Fst<A>, FstReader<A> > *reader_; 387 388 DISALLOW_COPY_AND_ASSIGN(STListFarReader); 389 }; 390 391 template <class A> 392 class FstFarReader : public FarReader<A> { 393 public: 394 typedef A Arc; 395 396 static FstFarReader *Open(const string &filename) { 397 vector<string> filenames; 398 filenames.push_back(filename); 399 return new FstFarReader<A>(filenames); 400 } 401 402 static FstFarReader *Open(const vector<string> &filenames) { 403 return new FstFarReader<A>(filenames); 404 } 405 406 FstFarReader(const vector<string> &filenames) 407 : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) { 408 sort(keys_.begin(), keys_.end()); 409 streams_.resize(keys_.size(), 0); 410 for (size_t i = 0; i < keys_.size(); ++i) { 411 if (keys_[i].empty()) { 412 if (!has_stdin_) { 413 streams_[i] = &cin; 414 //sources_[i] = "stdin"; 415 has_stdin_ = true; 416 } else { 417 FSTERROR() << "FstFarReader::FstFarReader: stdin should only " 418 << "appear once in the input file list."; 419 error_ = true; 420 return; 421 } 422 } else { 423 streams_[i] = new ifstream( 424 keys_[i].c_str(), ifstream::in | ifstream::binary); 425 } 426 } 427 if (pos_ >= keys_.size()) return; 428 ReadFst(); 429 } 430 431 void Reset() { 432 if (has_stdin_) { 433 FSTERROR() << "FstFarReader::Reset: operation not supported on stdin"; 434 error_ = true; 435 return; 436 } 437 pos_ = 0; 438 ReadFst(); 439 } 440 441 bool Find(const string &key) { 442 if (has_stdin_) { 443 FSTERROR() << "FstFarReader::Find: operation not supported on stdin"; 444 error_ = true; 445 return false; 446 } 447 pos_ = 0;//TODO 448 ReadFst(); 449 return true; 450 } 451 452 bool Done() const { return error_ || pos_ >= keys_.size(); } 453 454 void Next() { 455 ++pos_; 456 ReadFst(); 457 } 458 459 const string &GetKey() const { 460 return keys_[pos_]; 461 } 462 463 const Fst<A> &GetFst() const { 464 return *fst_; 465 } 466 467 FarType Type() const { return FAR_FST; } 468 469 bool Error() const { return error_; } 470 471 ~FstFarReader() { 472 if (fst_) delete fst_; 473 for (size_t i = 0; i < keys_.size(); ++i) 474 delete streams_[i]; 475 } 476 477 private: 478 void ReadFst() { 479 if (fst_) delete fst_; 480 if (pos_ >= keys_.size()) return; 481 streams_[pos_]->seekg(0); 482 fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions()); 483 if (!fst_) { 484 FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_]; 485 error_ = true; 486 } 487 } 488 489 private: 490 vector<string> keys_; 491 vector<istream*> streams_; 492 bool has_stdin_; 493 size_t pos_; 494 mutable Fst<A> *fst_; 495 mutable bool error_; 496 497 DISALLOW_COPY_AND_ASSIGN(FstFarReader); 498 }; 499 500 template <class A> 501 FarReader<A> *FarReader<A>::Open(const string &filename) { 502 if (filename.empty()) 503 return STListFarReader<A>::Open(filename); 504 else if (IsSTTable(filename)) 505 return STTableFarReader<A>::Open(filename); 506 else if (IsSTList(filename)) 507 return STListFarReader<A>::Open(filename); 508 else if (IsFst(filename)) 509 return FstFarReader<A>::Open(filename); 510 return 0; 511 } 512 513 514 template <class A> 515 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { 516 if (!filenames.empty() && filenames[0].empty()) 517 return STListFarReader<A>::Open(filenames); 518 else if (!filenames.empty() && IsSTTable(filenames[0])) 519 return STTableFarReader<A>::Open(filenames); 520 else if (!filenames.empty() && IsSTList(filenames[0])) 521 return STListFarReader<A>::Open(filenames); 522 else if (!filenames.empty() && IsFst(filenames[0])) 523 return FstFarReader<A>::Open(filenames); 524 return 0; 525 } 526 527 } // namespace fst 528 529 #endif // FST_EXTENSIONS_FAR_FAR_H__ 530