Home | History | Annotate | Download | only in far
      1 // farcompilestrings.cc
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: allauzen (at) google.com (Cyril Allauzen)
     17 // Modified: jpr (at) google.com (Jake Ratkiewicz) to use new arc-type dispatching
     18 //
     19 // \file
     20 // Compiles a set of stings as FSTs and stores them in a finite-state
     21 // archive.
     22 //
     23 
     24 #include <fst/extensions/far/farscript.h>
     25 #include <fst/extensions/far/main.h>
     26 #include <iostream>
     27 #include <fstream>
     28 
     29 DEFINE_string(key_prefix, "", "Prefix to append to keys");
     30 DEFINE_string(key_suffix, "", "Suffix to append to keys");
     31 DEFINE_int32(generate_keys, 0,
     32              "Generate N digit numeric keys (def: use file basenames)");
     33 DEFINE_string(far_type, "default", "FAR file format type: one of: ");
     34 DEFINE_bool(allow_negative_labels, false,
     35             "Allow negative labels (not recommended; may cause conflicts)");
     36 DEFINE_string(arc_type, "standard", "Output arc type");
     37 DEFINE_string(entry_type, "line", "Entry type: one of : "
     38               "\"file\" (one FST per file), \"line\" (one FST per line)");
     39 DEFINE_string(fst_type, "vector", "Output FST type");
     40 DEFINE_string(token_type, "symbol", "Token type: one of : "
     41               "\"symbol\", \"byte\", \"utf8\"");
     42 DEFINE_string(symbols, "", "Label symbol table");
     43 DEFINE_string(unknown_symbol, "", "");
     44 DEFINE_bool(file_list_input, false,
     45             "Each input files contains a list of files to be processed");
     46 
     47 
     48 int  main(int argc, char **argv) {
     49   namespace s = fst::script;
     50 
     51   string usage = "Compiles a set of strings as FSTs and stores them in";
     52   usage += " a finite-state archive.\n\n Usage:";
     53   usage += argv[0];
     54   usage += " in1.txt [in2.txt ...] out.far\n";
     55 
     56   std::set_new_handler(FailedNewHandler);
     57   SetFlags(usage.c_str(), &argc, &argv, true);
     58 
     59   if (argc < 3) {
     60     ShowUsage();
     61     return 1;
     62   }
     63 
     64   vector<string> in_fnames(argc - 2);
     65 
     66   for (unsigned i = 1; i < argc - 1; ++i) {
     67     in_fnames[i - 1] = argv[i];
     68   }
     69 
     70   string out_fname = argv[argc - 1];
     71 
     72   fst::FarEntryType fet = fst::StringToFarEntryType(FLAGS_entry_type);
     73   fst::FarTokenType ftt = fst::StringToFarTokenType(FLAGS_token_type);
     74   fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type);
     75 
     76   s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type,
     77                        far_type, FLAGS_generate_keys, fet, ftt,
     78                        FLAGS_symbols, FLAGS_unknown_symbol,
     79                        FLAGS_allow_negative_labels,
     80                        FLAGS_file_list_input, FLAGS_key_prefix,
     81                        FLAGS_key_suffix);
     82 
     83   return 0;
     84 }
     85