1 // far.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Finite-State Transducer (FST) archive classes. 20 // 21 22 #ifndef FST_EXTENSIONS_FAR_FAR_H__ 23 #define FST_EXTENSIONS_FAR_FAR_H__ 24 25 #include <fst/extensions/far/stlist.h> 26 #include <fst/extensions/far/sttable.h> 27 #include <fst/fst.h> 28 #include <fst/vector-fst.h> 29 30 namespace fst { 31 32 enum FarEntryType { FET_LINE, FET_FILE }; 33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; 34 35 // FST archive header class 36 class FarHeader { 37 public: 38 const string &FarType() const { return fartype_; } 39 const string &ArcType() const { return arctype_; } 40 41 bool Read(const string &filename) { 42 FstHeader fsthdr; 43 if (filename.empty()) { // Header reading unsupported on stdin. 44 return false; 45 } else if (IsSTTable(filename)) { // Check if STTable 46 ReadSTTableHeader(filename, &fsthdr); 47 fartype_ = "sttable"; 48 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); 49 return true; 50 } else if (IsSTList(filename)) { // Check if STList 51 ReadSTListHeader(filename, &fsthdr); 52 fartype_ = "sttable"; 53 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); 54 return true; 55 } 56 return false; 57 } 58 59 private: 60 string fartype_; 61 string arctype_; 62 }; 63 64 enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2, 65 FAR_SSTABLE = 3 }; 66 67 // This class creates an archive of FSTs. 68 template <class A> 69 class FarWriter { 70 public: 71 typedef A Arc; 72 73 // Creates a new (empty) FST archive; returns NULL on error. 74 static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT); 75 76 // Adds an FST to the end of an archive. Keys must be non-empty and 77 // in lexicographic order. FSTs must have a suitable write method. 78 virtual void Add(const string &key, const Fst<A> &fst) = 0; 79 80 virtual FarType Type() const = 0; 81 82 virtual bool Error() const = 0; 83 84 virtual ~FarWriter() {} 85 86 protected: 87 FarWriter() {} 88 89 private: 90 DISALLOW_COPY_AND_ASSIGN(FarWriter); 91 }; 92 93 94 // This class iterates through an existing archive of FSTs. 95 template <class A> 96 class FarReader { 97 public: 98 typedef A Arc; 99 100 // Opens an existing FST archive in a single file; returns NULL on error. 101 // Sets current position to the beginning of the achive. 102 static FarReader *Open(const string &filename); 103 104 // Opens an existing FST archive in multiple files; returns NULL on error. 105 // Sets current position to the beginning of the achive. 106 static FarReader *Open(const vector<string> &filenames); 107 108 // Resets current posision to beginning of archive. 109 virtual void Reset() = 0; 110 111 // Sets current position to first entry >= key. Returns true if a match. 112 virtual bool Find(const string &key) = 0; 113 114 // Current position at end of archive? 115 virtual bool Done() const = 0; 116 117 // Move current position to next FST. 118 virtual void Next() = 0; 119 120 // Returns key at the current position. This reference is invalidated if 121 // the current position in the archive is changed. 122 virtual const string &GetKey() const = 0; 123 124 // Returns FST at the current position. This reference is invalidated if 125 // the current position in the archive is changed. 126 virtual const Fst<A> &GetFst() const = 0; 127 128 virtual FarType Type() const = 0; 129 130 virtual bool Error() const = 0; 131 132 virtual ~FarReader() {} 133 134 protected: 135 FarReader() {} 136 137 private: 138 DISALLOW_COPY_AND_ASSIGN(FarReader); 139 }; 140 141 142 template <class A> 143 class FstWriter { 144 public: 145 void operator()(ostream &strm, const Fst<A> &fst) const { 146 fst.Write(strm, FstWriteOptions()); 147 } 148 }; 149 150 151 template <class A> 152 class STTableFarWriter : public FarWriter<A> { 153 public: 154 typedef A Arc; 155 156 static STTableFarWriter *Create(const string filename) { 157 STTableWriter<Fst<A>, FstWriter<A> > *writer = 158 STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); 159 return new STTableFarWriter(writer); 160 } 161 162 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } 163 164 FarType Type() const { return FAR_STTABLE; } 165 166 bool Error() const { return writer_->Error(); } 167 168 ~STTableFarWriter() { delete writer_; } 169 170 private: 171 explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer) 172 : writer_(writer) {} 173 174 private: 175 STTableWriter<Fst<A>, FstWriter<A> > *writer_; 176 177 DISALLOW_COPY_AND_ASSIGN(STTableFarWriter); 178 }; 179 180 181 template <class A> 182 class STListFarWriter : public FarWriter<A> { 183 public: 184 typedef A Arc; 185 186 static STListFarWriter *Create(const string filename) { 187 STListWriter<Fst<A>, FstWriter<A> > *writer = 188 STListWriter<Fst<A>, FstWriter<A> >::Create(filename); 189 return new STListFarWriter(writer); 190 } 191 192 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } 193 194 FarType Type() const { return FAR_STLIST; } 195 196 bool Error() const { return writer_->Error(); } 197 198 ~STListFarWriter() { delete writer_; } 199 200 private: 201 explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer) 202 : writer_(writer) {} 203 204 private: 205 STListWriter<Fst<A>, FstWriter<A> > *writer_; 206 207 DISALLOW_COPY_AND_ASSIGN(STListFarWriter); 208 }; 209 210 211 template <class A> 212 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { 213 switch(type) { 214 case FAR_DEFAULT: 215 if (filename.empty()) 216 return STListFarWriter<A>::Create(filename); 217 case FAR_STTABLE: 218 return STTableFarWriter<A>::Create(filename); 219 break; 220 case FAR_STLIST: 221 return STListFarWriter<A>::Create(filename); 222 break; 223 default: 224 LOG(ERROR) << "FarWriter::Create: unknown far type"; 225 return 0; 226 } 227 } 228 229 230 template <class A> 231 class FstReader { 232 public: 233 Fst<A> *operator()(istream &strm) const { 234 return Fst<A>::Read(strm, FstReadOptions()); 235 } 236 }; 237 238 239 template <class A> 240 class STTableFarReader : public FarReader<A> { 241 public: 242 typedef A Arc; 243 244 static STTableFarReader *Open(const string &filename) { 245 STTableReader<Fst<A>, FstReader<A> > *reader = 246 STTableReader<Fst<A>, FstReader<A> >::Open(filename); 247 // TODO: error check 248 return new STTableFarReader(reader); 249 } 250 251 static STTableFarReader *Open(const vector<string> &filenames) { 252 STTableReader<Fst<A>, FstReader<A> > *reader = 253 STTableReader<Fst<A>, FstReader<A> >::Open(filenames); 254 // TODO: error check 255 return new STTableFarReader(reader); 256 } 257 258 void Reset() { reader_->Reset(); } 259 260 bool Find(const string &key) { return reader_->Find(key); } 261 262 bool Done() const { return reader_->Done(); } 263 264 void Next() { return reader_->Next(); } 265 266 const string &GetKey() const { return reader_->GetKey(); } 267 268 const Fst<A> &GetFst() const { return reader_->GetEntry(); } 269 270 FarType Type() const { return FAR_STTABLE; } 271 272 bool Error() const { return reader_->Error(); } 273 274 ~STTableFarReader() { delete reader_; } 275 276 private: 277 explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader) 278 : reader_(reader) {} 279 280 private: 281 STTableReader<Fst<A>, FstReader<A> > *reader_; 282 283 DISALLOW_COPY_AND_ASSIGN(STTableFarReader); 284 }; 285 286 287 template <class A> 288 class STListFarReader : public FarReader<A> { 289 public: 290 typedef A Arc; 291 292 static STListFarReader *Open(const string &filename) { 293 STListReader<Fst<A>, FstReader<A> > *reader = 294 STListReader<Fst<A>, FstReader<A> >::Open(filename); 295 // TODO: error check 296 return new STListFarReader(reader); 297 } 298 299 static STListFarReader *Open(const vector<string> &filenames) { 300 STListReader<Fst<A>, FstReader<A> > *reader = 301 STListReader<Fst<A>, FstReader<A> >::Open(filenames); 302 // TODO: error check 303 return new STListFarReader(reader); 304 } 305 306 void Reset() { reader_->Reset(); } 307 308 bool Find(const string &key) { return reader_->Find(key); } 309 310 bool Done() const { return reader_->Done(); } 311 312 void Next() { return reader_->Next(); } 313 314 const string &GetKey() const { return reader_->GetKey(); } 315 316 const Fst<A> &GetFst() const { return reader_->GetEntry(); } 317 318 FarType Type() const { return FAR_STLIST; } 319 320 bool Error() const { return reader_->Error(); } 321 322 ~STListFarReader() { delete reader_; } 323 324 private: 325 explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader) 326 : reader_(reader) {} 327 328 private: 329 STListReader<Fst<A>, FstReader<A> > *reader_; 330 331 DISALLOW_COPY_AND_ASSIGN(STListFarReader); 332 }; 333 334 335 template <class A> 336 FarReader<A> *FarReader<A>::Open(const string &filename) { 337 if (filename.empty()) 338 return STListFarReader<A>::Open(filename); 339 else if (IsSTTable(filename)) 340 return STTableFarReader<A>::Open(filename); 341 else if (IsSTList(filename)) 342 return STListFarReader<A>::Open(filename); 343 return 0; 344 } 345 346 347 template <class A> 348 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { 349 if (!filenames.empty() && filenames[0].empty()) 350 return STListFarReader<A>::Open(filenames); 351 else if (!filenames.empty() && IsSTTable(filenames[0])) 352 return STTableFarReader<A>::Open(filenames); 353 else if (!filenames.empty() && IsSTList(filenames[0])) 354 return STListFarReader<A>::Open(filenames); 355 return 0; 356 } 357 358 } // namespace fst 359 360 #endif // FST_EXTENSIONS_FAR_FAR_H__ 361