1 // fst.cc 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // FST definitions. 20 21 #include <fst/fst.h> 22 23 // Include these so they are registered 24 #include <fst/compact-fst.h> 25 #include <fst/const-fst.h> 26 #include <fst/matcher-fst.h> 27 #include <fst/vector-fst.h> 28 #include <fst/edit-fst.h> 29 30 // FST flag definitions 31 32 DEFINE_bool(fst_verify_properties, false, 33 "Verify fst properties queried by TestProperties"); 34 35 DEFINE_string(fst_weight_separator, ",", 36 "Character separator between printed composite weights; " 37 "must be a single character"); 38 39 DEFINE_string(fst_weight_parentheses, "", 40 "Characters enclosing the first weight of a printed composite " 41 "weight (e.g. pair weight, tuple weight and derived classes) to " 42 "ensure proper I/O of nested composite weights; " 43 "must have size 0 (none) or 2 (open and close parenthesis)"); 44 45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache"); 46 47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL, 48 "Cache byte size that triggers garbage collection"); 49 50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate"); 51 52 DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file"); 53 DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file"); 54 55 DEFINE_string(fst_read_mode, "read", 56 "Default file reading mode for mappable files"); 57 58 namespace fst { 59 60 // Register VectorFst, ConstFst and EditFst for common arcs types 61 REGISTER_FST(VectorFst, StdArc); 62 REGISTER_FST(VectorFst, LogArc); 63 REGISTER_FST(VectorFst, Log64Arc); 64 REGISTER_FST(ConstFst, StdArc); 65 REGISTER_FST(ConstFst, LogArc); 66 REGISTER_FST(ConstFst, Log64Arc); 67 REGISTER_FST(EditFst, StdArc); 68 REGISTER_FST(EditFst, LogArc); 69 REGISTER_FST(EditFst, Log64Arc); 70 71 // Register CompactFst for common arcs with the default (uint32) size type 72 static FstRegisterer< 73 CompactFst<StdArc, StringCompactor<StdArc> > > 74 CompactFst_StdArc_StringCompactor_registerer; 75 static FstRegisterer< 76 CompactFst<LogArc, StringCompactor<LogArc> > > 77 CompactFst_LogArc_StringCompactor_registerer; 78 static FstRegisterer< 79 CompactFst<StdArc, WeightedStringCompactor<StdArc> > > 80 CompactFst_StdArc_WeightedStringCompactor_registerer; 81 static FstRegisterer< 82 CompactFst<LogArc, WeightedStringCompactor<LogArc> > > 83 CompactFst_LogArc_WeightedStringCompactor_registerer; 84 static FstRegisterer< 85 CompactFst<StdArc, AcceptorCompactor<StdArc> > > 86 CompactFst_StdArc_AcceptorCompactor_registerer; 87 static FstRegisterer< 88 CompactFst<LogArc, AcceptorCompactor<LogArc> > > 89 CompactFst_LogArc_AcceptorCompactor_registerer; 90 static FstRegisterer< 91 CompactFst<StdArc, UnweightedCompactor<StdArc> > > 92 CompactFst_StdArc_UnweightedCompactor_registerer; 93 static FstRegisterer< 94 CompactFst<LogArc, UnweightedCompactor<LogArc> > > 95 CompactFst_LogArc_UnweightedCompactor_registerer; 96 static FstRegisterer< 97 CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > > 98 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer; 99 static FstRegisterer< 100 CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > > 101 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer; 102 103 // Fst type definitions for lookahead Fsts. 104 extern const char arc_lookahead_fst_type[] = "arc_lookahead"; 105 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead"; 106 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead"; 107 108 // Identifies stream data as an FST (and its endianity) 109 static const int32 kFstMagicNumber = 2125659606; 110 111 // Check for Fst magic number in stream, to indicate 112 // caller function that the stream content is an Fst header; 113 bool IsFstHeader(istream &strm, const string &source) { 114 int64 pos = strm.tellg(); 115 bool match = true; 116 int32 magic_number = 0; 117 ReadType(strm, &magic_number); 118 if (magic_number != kFstMagicNumber 119 ) { 120 match = false; 121 } 122 strm.seekg(pos); 123 return match; 124 } 125 126 // Check Fst magic number and read in Fst header. 127 // If rewind = true, reposition stream to before call (if possible). 128 bool FstHeader::Read(istream &strm, const string &source, bool rewind) { 129 int64 pos = 0; 130 if (rewind) pos = strm.tellg(); 131 int32 magic_number = 0; 132 ReadType(strm, &magic_number); 133 if (magic_number != kFstMagicNumber 134 ) { 135 LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source; 136 if (rewind) strm.seekg(pos); 137 return false; 138 } 139 140 ReadType(strm, &fsttype_); 141 ReadType(strm, &arctype_); 142 ReadType(strm, &version_); 143 ReadType(strm, &flags_); 144 ReadType(strm, &properties_); 145 ReadType(strm, &start_); 146 ReadType(strm, &numstates_); 147 ReadType(strm, &numarcs_); 148 if (!strm) { 149 LOG(ERROR) << "FstHeader::Read: read failed: " << source; 150 return false; 151 } 152 if (rewind) strm.seekg(pos); 153 return true; 154 } 155 156 // Write Fst magic number and Fst header. 157 bool FstHeader::Write(ostream &strm, const string &source) const { 158 WriteType(strm, kFstMagicNumber); 159 WriteType(strm, fsttype_); 160 WriteType(strm, arctype_); 161 WriteType(strm, version_); 162 WriteType(strm, flags_); 163 WriteType(strm, properties_); 164 WriteType(strm, start_); 165 WriteType(strm, numstates_); 166 WriteType(strm, numarcs_); 167 return true; 168 } 169 170 FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr, 171 const SymbolTable* isym, const SymbolTable* osym) 172 : source(src), header(hdr), isymbols(isym), osymbols(osym) { 173 mode = ReadMode(FLAGS_fst_read_mode); 174 } 175 176 FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym, 177 const SymbolTable* osym) 178 : source(src), header(0), isymbols(isym), osymbols(osym) { 179 mode = ReadMode(FLAGS_fst_read_mode); 180 } 181 182 FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) { 183 if (mode == "read") { 184 return READ; 185 } 186 if (mode == "map") { 187 return MAP; 188 } 189 LOG(ERROR) << "Unknown file read mode " << mode; 190 return READ; 191 } 192 193 } // namespace fst 194