Home | History | Annotate | Download | only in script
      1 // info.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Class to compute various information about FSTs, helper class for fstinfo.cc
     20 
     21 #ifndef FST_SCRIPT_INFO_IMPL_H_
     22 #define FST_SCRIPT_INFO_IMPL_H_
     23 
     24 #include <string>
     25 #include <vector>
     26 using std::vector;
     27 
     28 #include <fst/connect.h>
     29 #include <fst/dfs-visit.h>
     30 #include <fst/fst.h>
     31 #include <fst/lookahead-matcher.h>
     32 #include <fst/matcher.h>
     33 #include <fst/queue.h>
     34 #include <fst/test-properties.h>
     35 #include <fst/verify.h>
     36 #include <fst/visit.h>
     37 
     38 namespace fst {
     39 
     40 // Compute various information about FSTs, helper class for fstinfo.cc.
     41 // WARNING: Stand-alone use of this class is not recommended, most code
     42 // should call directly the relevant library functions: Fst<A>::NumStates,
     43 // Fst<A>::NumArcs, TestProperties, ...
     44 template <class A> class FstInfo {
     45  public:
     46   typedef A Arc;
     47   typedef typename A::StateId StateId;
     48   typedef typename A::Label Label;
     49   typedef typename A::Weight Weight;
     50 
     51   // When info_type is "short" (or "auto" and not an ExpandedFst)
     52   // then only minimal info is computed and can be requested.
     53   FstInfo(const Fst<A> &fst, bool test_properties,
     54           const string &arc_filter_type = "any",
     55           string info_type = "auto", bool verify = true)
     56       : fst_type_(fst.Type()),
     57         input_symbols_(fst.InputSymbols() ?
     58                        fst.InputSymbols()->Name() : "none"),
     59         output_symbols_(fst.OutputSymbols() ?
     60                         fst.OutputSymbols()->Name() : "none"),
     61         nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0),
     62         nepsilons_(0), niepsilons_(0), noepsilons_(0),
     63         naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0),
     64         input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE),
     65         input_lookahead_(false), output_lookahead_(false),
     66         properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) {
     67     if (info_type == "long") {
     68       long_info_ = true;
     69     } else if (info_type == "short") {
     70       long_info_ = false;
     71     } else if (info_type == "auto") {
     72       long_info_ = fst.Properties(kExpanded, false);
     73     } else {
     74       FSTERROR() << "Bad info type: " << info_type;
     75       return;
     76     }
     77 
     78     if (!long_info_)
     79       return;
     80 
     81     // If the FST is not sane, we return.
     82     if (verify && !Verify(fst)) {
     83       FSTERROR() << "FstInfo: Verify: FST not well-formed.";
     84       return;
     85     }
     86 
     87     start_ = fst.Start();
     88     properties_ = fst.Properties(kFstProperties, test_properties);
     89 
     90     for (StateIterator< Fst<A> > siter(fst);
     91          !siter.Done();
     92          siter.Next()) {
     93       ++nstates_;
     94       StateId s = siter.Value();
     95       if (fst.Final(s) != Weight::Zero())
     96         ++nfinal_;
     97       for (ArcIterator< Fst<A> > aiter(fst, s);
     98            !aiter.Done();
     99            aiter.Next()) {
    100         const A &arc = aiter.Value();
    101         ++narcs_;
    102         if (arc.ilabel == 0 && arc.olabel == 0)
    103           ++nepsilons_;
    104         if (arc.ilabel == 0)
    105           ++niepsilons_;
    106         if (arc.olabel == 0)
    107           ++noepsilons_;
    108       }
    109     }
    110 
    111     {
    112       vector<StateId> cc;
    113       CcVisitor<Arc> cc_visitor(&cc);
    114       FifoQueue<StateId> fifo_queue;
    115       if (arc_filter_type == "any") {
    116         Visit(fst, &cc_visitor, &fifo_queue);
    117       } else if (arc_filter_type == "epsilon") {
    118         Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
    119       } else if (arc_filter_type == "iepsilon") {
    120         Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
    121       } else if (arc_filter_type == "oepsilon") {
    122         Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
    123       } else {
    124         FSTERROR() << "Bad arc filter type: " << arc_filter_type;
    125         return;
    126       }
    127 
    128       for (StateId s = 0; s < cc.size(); ++s) {
    129         if (cc[s] >= ncc_)
    130           ncc_ = cc[s] + 1;
    131       }
    132     }
    133 
    134     {
    135       vector<StateId> scc;
    136       vector<bool> access, coaccess;
    137       uint64 props = 0;
    138       SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
    139       if (arc_filter_type == "any") {
    140         DfsVisit(fst, &scc_visitor);
    141       } else if (arc_filter_type == "epsilon") {
    142         DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
    143       } else if (arc_filter_type == "iepsilon") {
    144         DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
    145       } else if (arc_filter_type == "oepsilon") {
    146         DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
    147       } else {
    148         FSTERROR() << "Bad arc filter type: " << arc_filter_type;
    149         return;
    150       }
    151 
    152       for (StateId s = 0; s < scc.size(); ++s) {
    153         if (access[s])
    154           ++naccess_;
    155         if (coaccess[s])
    156           ++ncoaccess_;
    157         if (access[s] && coaccess[s])
    158           ++nconnect_;
    159         if (scc[s] >= nscc_)
    160           nscc_ = scc[s] + 1;
    161       }
    162     }
    163 
    164     LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT);
    165     input_match_type_ =  imatcher.Type(test_properties);
    166     input_lookahead_ =  imatcher.Flags() & kInputLookAheadMatcher;
    167 
    168     LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT);
    169     output_match_type_ =  omatcher.Type(test_properties);
    170     output_lookahead_ =  omatcher.Flags() & kOutputLookAheadMatcher;
    171   }
    172 
    173   // Short info
    174   const string& FstType() const { return fst_type_; }
    175   const string& ArcType() const { return A::Type(); }
    176   const string& InputSymbols() const { return input_symbols_; }
    177   const string& OutputSymbols() const { return output_symbols_; }
    178   const bool LongInfo() const { return long_info_; }
    179   const string& ArcFilterType() const { return arc_filter_type_; }
    180 
    181   // Long info
    182   MatchType InputMatchType() const { CheckLong(); return input_match_type_; }
    183   MatchType OutputMatchType() const { CheckLong(); return output_match_type_; }
    184   bool InputLookAhead() const { CheckLong(); return input_lookahead_; }
    185   bool OutputLookAhead() const { CheckLong();  return output_lookahead_; }
    186   int64 NumStates() const { CheckLong();  return nstates_; }
    187   int64 NumArcs() const { CheckLong();  return narcs_; }
    188   int64 Start() const { CheckLong();  return start_; }
    189   int64 NumFinal() const { CheckLong();  return nfinal_; }
    190   int64 NumEpsilons() const { CheckLong();  return nepsilons_; }
    191   int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; }
    192   int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; }
    193   int64 NumAccessible() const { CheckLong(); return naccess_; }
    194   int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; }
    195   int64 NumConnected() const { CheckLong(); return nconnect_; }
    196   int64 NumCc() const { CheckLong(); return ncc_; }
    197   int64 NumScc() const { CheckLong(); return nscc_; }
    198   uint64 Properties() const { CheckLong(); return properties_; }
    199 
    200  private:
    201   void CheckLong() const {
    202     if (!long_info_)
    203       FSTERROR() << "FstInfo: method only available with long info version";
    204   }
    205 
    206   string fst_type_;
    207   string input_symbols_;
    208   string output_symbols_;
    209   int64 nstates_;
    210   int64 narcs_;
    211   int64 start_;
    212   int64 nfinal_;
    213   int64 nepsilons_;
    214   int64 niepsilons_;
    215   int64 noepsilons_;
    216   int64 naccess_;
    217   int64 ncoaccess_;
    218   int64 nconnect_;
    219   int64 ncc_;
    220   int64 nscc_;
    221   MatchType input_match_type_;
    222   MatchType output_match_type_;
    223   bool input_lookahead_;
    224   bool output_lookahead_;
    225   uint64 properties_;
    226   string arc_filter_type_;
    227   bool long_info_;
    228   DISALLOW_COPY_AND_ASSIGN(FstInfo);
    229 };
    230 
    231 template <class A>
    232 void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) {
    233   ostream &os = pipe ? cerr : cout;
    234 
    235   ios_base::fmtflags old = os.setf(ios::left);
    236   os.width(50);
    237   os << "fst type" <<  fstinfo.FstType() << endl;
    238   os.width(50);
    239   os << "arc type" << fstinfo.ArcType() << endl;
    240   os.width(50);
    241   os << "input symbol table" << fstinfo.InputSymbols() << endl;
    242   os.width(50);
    243   os << "output symbol table" << fstinfo.OutputSymbols() << endl;
    244 
    245   if (!fstinfo.LongInfo()) {
    246     os.setf(old);
    247     return;
    248   }
    249 
    250   os.width(50);
    251   os << "# of states" << fstinfo.NumStates() << endl;
    252   os.width(50);
    253   os << "# of arcs" << fstinfo.NumArcs() << endl;
    254   os.width(50);
    255   os << "initial state" << fstinfo.Start() << endl;
    256   os.width(50);
    257   os << "# of final states" << fstinfo.NumFinal() << endl;
    258   os.width(50);
    259   os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl;
    260   os.width(50);
    261   os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl;
    262   os.width(50);
    263   os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl;
    264   os.width(50);
    265 
    266   string arc_type = "";
    267   if (fstinfo.ArcFilterType() == "epsilon")
    268     arc_type = "epsilon ";
    269   else if (fstinfo.ArcFilterType() == "iepsilon")
    270     arc_type = "input-epsilon ";
    271   else if (fstinfo.ArcFilterType() == "oepsilon")
    272     arc_type = "output-epsilon ";
    273 
    274   string accessible_label = "# of " +  arc_type + "accessible states";
    275   os.width(50);
    276   os << accessible_label << fstinfo.NumAccessible() << endl;
    277   string coaccessible_label = "# of " +  arc_type + "coaccessible states";
    278   os.width(50);
    279   os << coaccessible_label << fstinfo.NumCoAccessible() << endl;
    280   string connected_label = "# of " +  arc_type + "connected states";
    281   os.width(50);
    282   os << connected_label << fstinfo.NumConnected() << endl;
    283   string numcc_label = "# of " +  arc_type + "connected components";
    284   os.width(50);
    285   os << numcc_label << fstinfo.NumCc() << endl;
    286   string numscc_label = "# of " +  arc_type + "strongly conn components";
    287   os.width(50);
    288   os << numscc_label << fstinfo.NumScc() << endl;
    289 
    290   os.width(50);
    291   os << "input matcher"
    292      << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' :
    293          fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
    294   os.width(50);
    295   os << "output matcher"
    296      << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' :
    297          fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
    298   os.width(50);
    299   os << "input lookahead"
    300      << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl;
    301   os.width(50);
    302   os << "output lookahead"
    303      << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl;
    304 
    305   uint64 prop = 1;
    306   for (int i = 0; i < 64; ++i, prop <<= 1) {
    307     if (prop & kBinaryProperties) {
    308       char value = 'n';
    309       if (fstinfo.Properties() & prop) value = 'y';
    310       os.width(50);
    311       os << PropertyNames[i] << value << endl;
    312     } else if (prop & kPosTrinaryProperties) {
    313       char value = '?';
    314       if (fstinfo.Properties() & prop) value = 'y';
    315       else if (fstinfo.Properties() & prop << 1) value = 'n';
    316       os.width(50);
    317       os << PropertyNames[i] << value << endl;
    318     }
    319   }
    320   os.setf(old);
    321 }
    322 
    323 }  // namespace fst
    324 
    325 #endif  // FST_SCRIPT_INFO_IMPL_H_
    326