1 // info.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Class to compute various information about FSTs, helper class for fstinfo.cc 20 21 #ifndef FST_SCRIPT_INFO_IMPL_H_ 22 #define FST_SCRIPT_INFO_IMPL_H_ 23 24 #include <string> 25 #include <vector> 26 using std::vector; 27 28 #include <fst/connect.h> 29 #include <fst/dfs-visit.h> 30 #include <fst/fst.h> 31 #include <fst/lookahead-matcher.h> 32 #include <fst/matcher.h> 33 #include <fst/queue.h> 34 #include <fst/test-properties.h> 35 #include <fst/verify.h> 36 #include <fst/visit.h> 37 38 namespace fst { 39 40 // Compute various information about FSTs, helper class for fstinfo.cc. 41 // WARNING: Stand-alone use of this class is not recommended, most code 42 // should call directly the relevant library functions: Fst<A>::NumStates, 43 // Fst<A>::NumArcs, TestProperties, ... 44 template <class A> class FstInfo { 45 public: 46 typedef A Arc; 47 typedef typename A::StateId StateId; 48 typedef typename A::Label Label; 49 typedef typename A::Weight Weight; 50 51 // When info_type is "short" (or "auto" and not an ExpandedFst) 52 // then only minimal info is computed and can be requested. 53 FstInfo(const Fst<A> &fst, bool test_properties, 54 const string &arc_filter_type = "any", 55 string info_type = "auto", bool verify = true) 56 : fst_type_(fst.Type()), 57 input_symbols_(fst.InputSymbols() ? 58 fst.InputSymbols()->Name() : "none"), 59 output_symbols_(fst.OutputSymbols() ? 60 fst.OutputSymbols()->Name() : "none"), 61 nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0), 62 nepsilons_(0), niepsilons_(0), noepsilons_(0), 63 naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0), 64 input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE), 65 input_lookahead_(false), output_lookahead_(false), 66 properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) { 67 if (info_type == "long") { 68 long_info_ = true; 69 } else if (info_type == "short") { 70 long_info_ = false; 71 } else if (info_type == "auto") { 72 long_info_ = fst.Properties(kExpanded, false); 73 } else { 74 FSTERROR() << "Bad info type: " << info_type; 75 return; 76 } 77 78 if (!long_info_) 79 return; 80 81 // If the FST is not sane, we return. 82 if (verify && !Verify(fst)) { 83 FSTERROR() << "FstInfo: Verify: FST not well-formed."; 84 return; 85 } 86 87 start_ = fst.Start(); 88 properties_ = fst.Properties(kFstProperties, test_properties); 89 90 for (StateIterator< Fst<A> > siter(fst); 91 !siter.Done(); 92 siter.Next()) { 93 ++nstates_; 94 StateId s = siter.Value(); 95 if (fst.Final(s) != Weight::Zero()) 96 ++nfinal_; 97 for (ArcIterator< Fst<A> > aiter(fst, s); 98 !aiter.Done(); 99 aiter.Next()) { 100 const A &arc = aiter.Value(); 101 ++narcs_; 102 if (arc.ilabel == 0 && arc.olabel == 0) 103 ++nepsilons_; 104 if (arc.ilabel == 0) 105 ++niepsilons_; 106 if (arc.olabel == 0) 107 ++noepsilons_; 108 } 109 } 110 111 { 112 vector<StateId> cc; 113 CcVisitor<Arc> cc_visitor(&cc); 114 FifoQueue<StateId> fifo_queue; 115 if (arc_filter_type == "any") { 116 Visit(fst, &cc_visitor, &fifo_queue); 117 } else if (arc_filter_type == "epsilon") { 118 Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>()); 119 } else if (arc_filter_type == "iepsilon") { 120 Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>()); 121 } else if (arc_filter_type == "oepsilon") { 122 Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>()); 123 } else { 124 FSTERROR() << "Bad arc filter type: " << arc_filter_type; 125 return; 126 } 127 128 for (StateId s = 0; s < cc.size(); ++s) { 129 if (cc[s] >= ncc_) 130 ncc_ = cc[s] + 1; 131 } 132 } 133 134 { 135 vector<StateId> scc; 136 vector<bool> access, coaccess; 137 uint64 props = 0; 138 SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props); 139 if (arc_filter_type == "any") { 140 DfsVisit(fst, &scc_visitor); 141 } else if (arc_filter_type == "epsilon") { 142 DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>()); 143 } else if (arc_filter_type == "iepsilon") { 144 DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>()); 145 } else if (arc_filter_type == "oepsilon") { 146 DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>()); 147 } else { 148 FSTERROR() << "Bad arc filter type: " << arc_filter_type; 149 return; 150 } 151 152 for (StateId s = 0; s < scc.size(); ++s) { 153 if (access[s]) 154 ++naccess_; 155 if (coaccess[s]) 156 ++ncoaccess_; 157 if (access[s] && coaccess[s]) 158 ++nconnect_; 159 if (scc[s] >= nscc_) 160 nscc_ = scc[s] + 1; 161 } 162 } 163 164 LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT); 165 input_match_type_ = imatcher.Type(test_properties); 166 input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher; 167 168 LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT); 169 output_match_type_ = omatcher.Type(test_properties); 170 output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher; 171 } 172 173 // Short info 174 const string& FstType() const { return fst_type_; } 175 const string& ArcType() const { return A::Type(); } 176 const string& InputSymbols() const { return input_symbols_; } 177 const string& OutputSymbols() const { return output_symbols_; } 178 const bool LongInfo() const { return long_info_; } 179 const string& ArcFilterType() const { return arc_filter_type_; } 180 181 // Long info 182 MatchType InputMatchType() const { CheckLong(); return input_match_type_; } 183 MatchType OutputMatchType() const { CheckLong(); return output_match_type_; } 184 bool InputLookAhead() const { CheckLong(); return input_lookahead_; } 185 bool OutputLookAhead() const { CheckLong(); return output_lookahead_; } 186 int64 NumStates() const { CheckLong(); return nstates_; } 187 int64 NumArcs() const { CheckLong(); return narcs_; } 188 int64 Start() const { CheckLong(); return start_; } 189 int64 NumFinal() const { CheckLong(); return nfinal_; } 190 int64 NumEpsilons() const { CheckLong(); return nepsilons_; } 191 int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; } 192 int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; } 193 int64 NumAccessible() const { CheckLong(); return naccess_; } 194 int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; } 195 int64 NumConnected() const { CheckLong(); return nconnect_; } 196 int64 NumCc() const { CheckLong(); return ncc_; } 197 int64 NumScc() const { CheckLong(); return nscc_; } 198 uint64 Properties() const { CheckLong(); return properties_; } 199 200 private: 201 void CheckLong() const { 202 if (!long_info_) 203 FSTERROR() << "FstInfo: method only available with long info version"; 204 } 205 206 string fst_type_; 207 string input_symbols_; 208 string output_symbols_; 209 int64 nstates_; 210 int64 narcs_; 211 int64 start_; 212 int64 nfinal_; 213 int64 nepsilons_; 214 int64 niepsilons_; 215 int64 noepsilons_; 216 int64 naccess_; 217 int64 ncoaccess_; 218 int64 nconnect_; 219 int64 ncc_; 220 int64 nscc_; 221 MatchType input_match_type_; 222 MatchType output_match_type_; 223 bool input_lookahead_; 224 bool output_lookahead_; 225 uint64 properties_; 226 string arc_filter_type_; 227 bool long_info_; 228 DISALLOW_COPY_AND_ASSIGN(FstInfo); 229 }; 230 231 template <class A> 232 void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) { 233 ostream &os = pipe ? cerr : cout; 234 235 ios_base::fmtflags old = os.setf(ios::left); 236 os.width(50); 237 os << "fst type" << fstinfo.FstType() << endl; 238 os.width(50); 239 os << "arc type" << fstinfo.ArcType() << endl; 240 os.width(50); 241 os << "input symbol table" << fstinfo.InputSymbols() << endl; 242 os.width(50); 243 os << "output symbol table" << fstinfo.OutputSymbols() << endl; 244 245 if (!fstinfo.LongInfo()) { 246 os.setf(old); 247 return; 248 } 249 250 os.width(50); 251 os << "# of states" << fstinfo.NumStates() << endl; 252 os.width(50); 253 os << "# of arcs" << fstinfo.NumArcs() << endl; 254 os.width(50); 255 os << "initial state" << fstinfo.Start() << endl; 256 os.width(50); 257 os << "# of final states" << fstinfo.NumFinal() << endl; 258 os.width(50); 259 os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl; 260 os.width(50); 261 os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl; 262 os.width(50); 263 os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl; 264 os.width(50); 265 266 string arc_type = ""; 267 if (fstinfo.ArcFilterType() == "epsilon") 268 arc_type = "epsilon "; 269 else if (fstinfo.ArcFilterType() == "iepsilon") 270 arc_type = "input-epsilon "; 271 else if (fstinfo.ArcFilterType() == "oepsilon") 272 arc_type = "output-epsilon "; 273 274 string accessible_label = "# of " + arc_type + "accessible states"; 275 os.width(50); 276 os << accessible_label << fstinfo.NumAccessible() << endl; 277 string coaccessible_label = "# of " + arc_type + "coaccessible states"; 278 os.width(50); 279 os << coaccessible_label << fstinfo.NumCoAccessible() << endl; 280 string connected_label = "# of " + arc_type + "connected states"; 281 os.width(50); 282 os << connected_label << fstinfo.NumConnected() << endl; 283 string numcc_label = "# of " + arc_type + "connected components"; 284 os.width(50); 285 os << numcc_label << fstinfo.NumCc() << endl; 286 string numscc_label = "# of " + arc_type + "strongly conn components"; 287 os.width(50); 288 os << numscc_label << fstinfo.NumScc() << endl; 289 290 os.width(50); 291 os << "input matcher" 292 << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' : 293 fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl; 294 os.width(50); 295 os << "output matcher" 296 << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' : 297 fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl; 298 os.width(50); 299 os << "input lookahead" 300 << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl; 301 os.width(50); 302 os << "output lookahead" 303 << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl; 304 305 uint64 prop = 1; 306 for (int i = 0; i < 64; ++i, prop <<= 1) { 307 if (prop & kBinaryProperties) { 308 char value = 'n'; 309 if (fstinfo.Properties() & prop) value = 'y'; 310 os.width(50); 311 os << PropertyNames[i] << value << endl; 312 } else if (prop & kPosTrinaryProperties) { 313 char value = '?'; 314 if (fstinfo.Properties() & prop) value = 'y'; 315 else if (fstinfo.Properties() & prop << 1) value = 'n'; 316 os.width(50); 317 os << PropertyNames[i] << value << endl; 318 } 319 } 320 os.setf(old); 321 } 322 323 } // namespace fst 324 325 #endif // FST_SCRIPT_INFO_IMPL_H_ 326