1 // farcompilestrings.cc 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: allauzen (at) google.com (Cyril Allauzen) 17 // Modified: jpr (at) google.com (Jake Ratkiewicz) to use new arc-type dispatching 18 // 19 // \file 20 // Compiles a set of stings as FSTs and stores them in a finite-state 21 // archive. 22 // 23 24 #include <fst/extensions/far/farscript.h> 25 #include <fst/extensions/far/main.h> 26 #include <iostream> 27 #include <fstream> 28 29 DEFINE_string(key_prefix, "", "Prefix to append to keys"); 30 DEFINE_string(key_suffix, "", "Suffix to append to keys"); 31 DEFINE_int32(generate_keys, 0, 32 "Generate N digit numeric keys (def: use file basenames)"); 33 DEFINE_string(far_type, "default", "FAR file format type: one of: "); 34 DEFINE_bool(allow_negative_labels, false, 35 "Allow negative labels (not recommended; may cause conflicts)"); 36 DEFINE_string(arc_type, "standard", "Output arc type"); 37 DEFINE_string(entry_type, "line", "Entry type: one of : " 38 "\"file\" (one FST per file), \"line\" (one FST per line)"); 39 DEFINE_string(fst_type, "vector", "Output FST type"); 40 DEFINE_string(token_type, "symbol", "Token type: one of : " 41 "\"symbol\", \"byte\", \"utf8\""); 42 DEFINE_string(symbols, "", "Label symbol table"); 43 DEFINE_string(unknown_symbol, "", ""); 44 DEFINE_bool(file_list_input, false, 45 "Each input files contains a list of files to be processed"); 46 47 48 int main(int argc, char **argv) { 49 namespace s = fst::script; 50 51 string usage = "Compiles a set of strings as FSTs and stores them in"; 52 usage += " a finite-state archive.\n\n Usage:"; 53 usage += argv[0]; 54 usage += " in1.txt [in2.txt ...] out.far\n"; 55 56 std::set_new_handler(FailedNewHandler); 57 SetFlags(usage.c_str(), &argc, &argv, true); 58 59 if (argc < 3) { 60 ShowUsage(); 61 return 1; 62 } 63 64 vector<string> in_fnames(argc - 2); 65 66 for (unsigned i = 1; i < argc - 1; ++i) { 67 in_fnames[i - 1] = argv[i]; 68 } 69 70 string out_fname = argv[argc - 1]; 71 72 fst::FarEntryType fet = fst::StringToFarEntryType(FLAGS_entry_type); 73 fst::FarTokenType ftt = fst::StringToFarTokenType(FLAGS_token_type); 74 fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type); 75 76 s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type, 77 far_type, FLAGS_generate_keys, fet, ftt, 78 FLAGS_symbols, FLAGS_unknown_symbol, 79 FLAGS_allow_negative_labels, 80 FLAGS_file_list_input, FLAGS_key_prefix, 81 FLAGS_key_suffix); 82 83 return 0; 84 } 85