1 // printstrings-main.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: allauzen (at) google.com (Cyril Allauzen) 17 // Modified by: jpr (at) google.com (Jake Ratkiewicz) 18 // 19 // \file 20 // Output as strings the string FSTs in a finite-state archive. 21 22 #ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ 23 #define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ 24 25 #include <string> 26 #include <vector> 27 using std::vector; 28 29 #include <fst/extensions/far/far.h> 30 #include <fst/string.h> 31 32 namespace fst { 33 34 template <class Arc> 35 void FarPrintStrings( 36 const vector<string> &ifilenames, const FarEntryType entry_type, 37 const FarTokenType far_token_type, const string &begin_key, 38 const string &end_key, const bool print_key, const string &symbols_fname, 39 const int32 generate_filenames, const string &filename_prefix, 40 const string &filename_suffix) { 41 42 typename StringPrinter<Arc>::TokenType token_type; 43 if (far_token_type == FTT_SYMBOL) { 44 token_type = StringPrinter<Arc>::SYMBOL; 45 } else if (far_token_type == FTT_BYTE) { 46 token_type = StringPrinter<Arc>::BYTE; 47 } else if (far_token_type == FTT_UTF8) { 48 token_type = StringPrinter<Arc>::UTF8; 49 } else { 50 FSTERROR() << "FarPrintStrings: unknown token type"; 51 return; 52 } 53 54 const SymbolTable *syms = 0; 55 if (!symbols_fname.empty()) { 56 // allow negative flag? 57 syms = SymbolTable::ReadText(symbols_fname, true); 58 if (!syms) { 59 FSTERROR() << "FarPrintStrings: error reading symbol table: " 60 << symbols_fname; 61 return; 62 } 63 } 64 65 StringPrinter<Arc> string_printer(token_type, syms); 66 67 FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames); 68 if (!far_reader) return; 69 70 if (!begin_key.empty()) 71 far_reader->Find(begin_key); 72 73 string okey; 74 int nrep = 0; 75 for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) { 76 string key = far_reader->GetKey(); 77 if (!end_key.empty() && end_key < key) 78 break; 79 if (okey == key) 80 ++nrep; 81 else 82 nrep = 0; 83 okey = key; 84 85 const Fst<Arc> &fst = far_reader->GetFst(); 86 string str; 87 VLOG(2) << "Handling key: " << key; 88 string_printer(fst, &str); 89 90 if (entry_type == FET_LINE) { 91 if (print_key) 92 cout << key << "\t"; 93 cout << str << endl; 94 } else if (entry_type == FET_FILE) { 95 stringstream sstrm; 96 if (generate_filenames) { 97 sstrm.fill('0'); 98 sstrm << std::right << setw(generate_filenames) << i; 99 } else { 100 sstrm << key; 101 if (nrep > 0) 102 sstrm << "." << nrep; 103 } 104 105 string filename; 106 filename = filename_prefix + sstrm.str() + filename_suffix; 107 108 ofstream ostrm(filename.c_str()); 109 if (!ostrm) { 110 FSTERROR() << "FarPrintStrings: Can't open file:" << filename; 111 delete syms; 112 delete far_reader; 113 return; 114 } 115 ostrm << str; 116 if (token_type == StringPrinter<Arc>::SYMBOL) 117 ostrm << "\n"; 118 } 119 } 120 } 121 122 123 124 } // namespace fst 125 126 #endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ 127