Home | History | Annotate | Download | only in far
      1 // printstrings-main.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: allauzen (at) google.com (Cyril Allauzen)
     17 // Modified by: jpr (at) google.com (Jake Ratkiewicz)
     18 //
     19 // \file
     20 // Output as strings the string FSTs in a finite-state archive.
     21 
     22 #ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
     23 #define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
     24 
     25 #include <string>
     26 #include <vector>
     27 using std::vector;
     28 
     29 #include <fst/extensions/far/far.h>
     30 #include <fst/shortest-distance.h>
     31 #include <fst/string.h>
     32 
     33 DECLARE_string(far_field_separator);
     34 
     35 namespace fst {
     36 
     37 template <class Arc>
     38 void FarPrintStrings(
     39     const vector<string> &ifilenames, const FarEntryType entry_type,
     40     const FarTokenType far_token_type, const string &begin_key,
     41     const string &end_key, const bool print_key, const bool print_weight,
     42     const string &symbols_fname, const bool initial_symbols,
     43     const int32 generate_filenames,
     44     const string &filename_prefix, const string &filename_suffix) {
     45 
     46   typename StringPrinter<Arc>::TokenType token_type;
     47   if (far_token_type == FTT_SYMBOL) {
     48     token_type = StringPrinter<Arc>::SYMBOL;
     49   } else if (far_token_type == FTT_BYTE) {
     50     token_type = StringPrinter<Arc>::BYTE;
     51   } else if (far_token_type == FTT_UTF8) {
     52     token_type = StringPrinter<Arc>::UTF8;
     53   } else {
     54     FSTERROR() << "FarPrintStrings: unknown token type";
     55     return;
     56   }
     57 
     58   const SymbolTable *syms = 0;
     59   if (!symbols_fname.empty()) {
     60     // allow negative flag?
     61     SymbolTableTextOptions opts;
     62     opts.allow_negative = true;
     63     syms = SymbolTable::ReadText(symbols_fname, opts);
     64     if (!syms) {
     65       FSTERROR() << "FarPrintStrings: error reading symbol table: "
     66                  << symbols_fname;
     67       return;
     68     }
     69   }
     70 
     71   FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
     72   if (!far_reader) return;
     73 
     74   if (!begin_key.empty())
     75     far_reader->Find(begin_key);
     76 
     77   string okey;
     78   int nrep = 0;
     79   for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
     80     string key = far_reader->GetKey();
     81     if (!end_key.empty() && end_key < key)
     82       break;
     83     if (okey == key)
     84       ++nrep;
     85     else
     86       nrep = 0;
     87     okey = key;
     88 
     89     const Fst<Arc> &fst = far_reader->GetFst();
     90     if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0)
     91       syms = fst.InputSymbols()->Copy();
     92     string str;
     93     VLOG(2) << "Handling key: " << key;
     94     StringPrinter<Arc> string_printer(
     95         token_type, syms ? syms : fst.InputSymbols());
     96     string_printer(fst, &str);
     97 
     98     if (entry_type == FET_LINE) {
     99       if (print_key)
    100         cout << key << FLAGS_far_field_separator[0];
    101       cout << str;
    102       if (print_weight)
    103         cout << FLAGS_far_field_separator[0] << ShortestDistance(fst);
    104       cout << endl;
    105     } else if (entry_type == FET_FILE) {
    106       stringstream sstrm;
    107       if (generate_filenames) {
    108         sstrm.fill('0');
    109         sstrm << std::right << setw(generate_filenames) << i;
    110       } else {
    111         sstrm << key;
    112         if (nrep > 0)
    113           sstrm << "." << nrep;
    114       }
    115 
    116       string filename;
    117       filename = filename_prefix +  sstrm.str() + filename_suffix;
    118 
    119       ofstream ostrm(filename.c_str());
    120       if (!ostrm) {
    121         FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
    122         delete syms;
    123         delete far_reader;
    124         return;
    125       }
    126       ostrm << str;
    127       if (token_type == StringPrinter<Arc>::SYMBOL)
    128         ostrm << "\n";
    129     }
    130   }
    131   delete syms;
    132 }
    133 
    134 
    135 
    136 }  // namespace fst
    137 
    138 #endif  // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
    139