1 // fst.cc 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // FST definitions. 20 21 #include <fst/fst.h> 22 23 // Include these so they are registered 24 #include <fst/compact-fst.h> 25 #include <fst/const-fst.h> 26 #include <fst/matcher-fst.h> 27 #include <fst/vector-fst.h> 28 #include <fst/edit-fst.h> 29 30 // FST flag definitions 31 32 DEFINE_bool(fst_verify_properties, false, 33 "Verify fst properties queried by TestProperties"); 34 35 DEFINE_string(fst_weight_separator, ",", 36 "Character separator between printed composite weights; " 37 "must be a single character"); 38 39 DEFINE_string(fst_weight_parentheses, "", 40 "Characters enclosing the first weight of a printed composite " 41 "weight (e.g. pair weight, tuple weight and derived classes) to " 42 "ensure proper I/O of nested composite weights; " 43 "must have size 0 (none) or 2 (open and close parenthesis)"); 44 45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache"); 46 47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL, 48 "Cache byte size that triggers garbage collection"); 49 50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate"); 51 52 DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file"); 53 DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file"); 54 55 namespace fst { 56 57 // Register VectorFst, ConstFst and EditFst for common arcs types 58 REGISTER_FST(VectorFst, StdArc); 59 REGISTER_FST(VectorFst, LogArc); 60 REGISTER_FST(VectorFst, Log64Arc); 61 REGISTER_FST(ConstFst, StdArc); 62 REGISTER_FST(ConstFst, LogArc); 63 REGISTER_FST(ConstFst, Log64Arc); 64 REGISTER_FST(EditFst, StdArc); 65 REGISTER_FST(EditFst, LogArc); 66 REGISTER_FST(EditFst, Log64Arc); 67 68 // Register CompactFst for common arcs with the default (uint32) size type 69 static FstRegisterer< 70 CompactFst<StdArc, StringCompactor<StdArc> > > 71 CompactFst_StdArc_StringCompactor_registerer; 72 static FstRegisterer< 73 CompactFst<LogArc, StringCompactor<LogArc> > > 74 CompactFst_LogArc_StringCompactor_registerer; 75 static FstRegisterer< 76 CompactFst<StdArc, WeightedStringCompactor<StdArc> > > 77 CompactFst_StdArc_WeightedStringCompactor_registerer; 78 static FstRegisterer< 79 CompactFst<LogArc, WeightedStringCompactor<LogArc> > > 80 CompactFst_LogArc_WeightedStringCompactor_registerer; 81 static FstRegisterer< 82 CompactFst<StdArc, AcceptorCompactor<StdArc> > > 83 CompactFst_StdArc_AcceptorCompactor_registerer; 84 static FstRegisterer< 85 CompactFst<LogArc, AcceptorCompactor<LogArc> > > 86 CompactFst_LogArc_AcceptorCompactor_registerer; 87 static FstRegisterer< 88 CompactFst<StdArc, UnweightedCompactor<StdArc> > > 89 CompactFst_StdArc_UnweightedCompactor_registerer; 90 static FstRegisterer< 91 CompactFst<LogArc, UnweightedCompactor<LogArc> > > 92 CompactFst_LogArc_UnweightedCompactor_registerer; 93 static FstRegisterer< 94 CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > > 95 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer; 96 static FstRegisterer< 97 CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > > 98 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer; 99 100 // Fst type definitions for lookahead Fsts. 101 extern const char arc_lookahead_fst_type[] = "arc_lookahead"; 102 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead"; 103 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead"; 104 105 // Identifies stream data as an FST (and its endianity) 106 static const int32 kFstMagicNumber = 2125659606; 107 108 // Check for Fst magic number in stream, to indicate 109 // caller function that the stream content is an Fst header; 110 bool IsFstHeader(istream &strm, const string &source) { 111 int64 pos = strm.tellg(); 112 bool match = true; 113 int32 magic_number = 0; 114 ReadType(strm, &magic_number); 115 if (magic_number != kFstMagicNumber 116 ) { 117 match = false; 118 } 119 strm.seekg(pos); 120 return match; 121 } 122 123 // Check Fst magic number and read in Fst header. 124 // If rewind = true, reposition stream to before call (if possible). 125 bool FstHeader::Read(istream &strm, const string &source, bool rewind) { 126 int64 pos = 0; 127 if (rewind) pos = strm.tellg(); 128 int32 magic_number = 0; 129 ReadType(strm, &magic_number); 130 if (magic_number != kFstMagicNumber 131 ) { 132 LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source; 133 if (rewind) strm.seekg(pos); 134 return false; 135 } 136 137 ReadType(strm, &fsttype_); 138 ReadType(strm, &arctype_); 139 ReadType(strm, &version_); 140 ReadType(strm, &flags_); 141 ReadType(strm, &properties_); 142 ReadType(strm, &start_); 143 ReadType(strm, &numstates_); 144 ReadType(strm, &numarcs_); 145 if (!strm) { 146 LOG(ERROR) << "FstHeader::Read: read failed: " << source; 147 return false; 148 } 149 if (rewind) strm.seekg(pos); 150 return true; 151 } 152 153 // Write Fst magic number and Fst header. 154 bool FstHeader::Write(ostream &strm, const string &source) const { 155 WriteType(strm, kFstMagicNumber); 156 WriteType(strm, fsttype_); 157 WriteType(strm, arctype_); 158 WriteType(strm, version_); 159 WriteType(strm, flags_); 160 WriteType(strm, properties_); 161 WriteType(strm, start_); 162 WriteType(strm, numstates_); 163 WriteType(strm, numarcs_); 164 return true; 165 } 166 167 } // namespace fst 168