Home | History | Annotate | Download | only in far
      1 // printstrings-main.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: allauzen (at) google.com (Cyril Allauzen)
     17 // Modified by: jpr (at) google.com (Jake Ratkiewicz)
     18 //
     19 // \file
     20 // Output as strings the string FSTs in a finite-state archive.
     21 
     22 #ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
     23 #define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
     24 
     25 #include <string>
     26 #include <vector>
     27 using std::vector;
     28 
     29 #include <fst/extensions/far/far.h>
     30 #include <fst/string.h>
     31 
     32 namespace fst {
     33 
     34 template <class Arc>
     35 void FarPrintStrings(
     36     const vector<string> &ifilenames, const FarEntryType entry_type,
     37     const FarTokenType far_token_type, const string &begin_key,
     38     const string &end_key, const bool print_key, const string &symbols_fname,
     39     const int32 generate_filenames, const string &filename_prefix,
     40     const string &filename_suffix) {
     41 
     42   typename StringPrinter<Arc>::TokenType token_type;
     43   if (far_token_type == FTT_SYMBOL) {
     44     token_type = StringPrinter<Arc>::SYMBOL;
     45   } else if (far_token_type == FTT_BYTE) {
     46     token_type = StringPrinter<Arc>::BYTE;
     47   } else if (far_token_type == FTT_UTF8) {
     48     token_type = StringPrinter<Arc>::UTF8;
     49   } else {
     50     FSTERROR() << "FarPrintStrings: unknown token type";
     51     return;
     52   }
     53 
     54   const SymbolTable *syms = 0;
     55   if (!symbols_fname.empty()) {
     56     // allow negative flag?
     57     syms = SymbolTable::ReadText(symbols_fname, true);
     58     if (!syms) {
     59       FSTERROR() << "FarPrintStrings: error reading symbol table: "
     60                  << symbols_fname;
     61       return;
     62     }
     63   }
     64 
     65   StringPrinter<Arc> string_printer(token_type, syms);
     66 
     67   FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
     68   if (!far_reader) return;
     69 
     70   if (!begin_key.empty())
     71     far_reader->Find(begin_key);
     72 
     73   string okey;
     74   int nrep = 0;
     75   for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
     76     string key = far_reader->GetKey();
     77     if (!end_key.empty() && end_key < key)
     78       break;
     79     if (okey == key)
     80       ++nrep;
     81     else
     82       nrep = 0;
     83     okey = key;
     84 
     85     const Fst<Arc> &fst = far_reader->GetFst();
     86     string str;
     87     VLOG(2) << "Handling key: " << key;
     88     string_printer(fst, &str);
     89 
     90     if (entry_type == FET_LINE) {
     91       if (print_key)
     92         cout << key << "\t";
     93       cout << str << endl;
     94     } else if (entry_type == FET_FILE) {
     95       stringstream sstrm;
     96       if (generate_filenames) {
     97         sstrm.fill('0');
     98         sstrm << std::right << setw(generate_filenames) << i;
     99       } else {
    100         sstrm << key;
    101         if (nrep > 0)
    102           sstrm << "." << nrep;
    103       }
    104 
    105       string filename;
    106       filename = filename_prefix +  sstrm.str() + filename_suffix;
    107 
    108       ofstream ostrm(filename.c_str());
    109       if (!ostrm) {
    110         FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
    111         delete syms;
    112         delete far_reader;
    113         return;
    114       }
    115       ostrm << str;
    116       if (token_type == StringPrinter<Arc>::SYMBOL)
    117         ostrm << "\n";
    118     }
    119   }
    120 }
    121 
    122 
    123 
    124 }  // namespace fst
    125 
    126 #endif  // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
    127