Home | History | Annotate | Download | only in pdt
      1 // info.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Prints information about a PDT.
     20 
     21 #ifndef FST_EXTENSIONS_PDT_INFO_H__
     22 #define FST_EXTENSIONS_PDT_INFO_H__
     23 
     24 #include <unordered_map>
     25 using std::tr1::unordered_map;
     26 using std::tr1::unordered_multimap;
     27 #include <tr1/unordered_set>
     28 using std::tr1::unordered_set;
     29 using std::tr1::unordered_multiset;
     30 #include <vector>
     31 using std::vector;
     32 
     33 #include <fst/fst.h>
     34 #include <fst/extensions/pdt/pdt.h>
     35 
     36 namespace fst {
     37 
     38 // Compute various information about PDTs, helper class for pdtinfo.cc.
     39 template <class A> class PdtInfo {
     40 public:
     41   typedef A Arc;
     42   typedef typename A::StateId StateId;
     43   typedef typename A::Label Label;
     44   typedef typename A::Weight Weight;
     45 
     46   PdtInfo(const Fst<A> &fst,
     47           const vector<pair<typename A::Label,
     48           typename A::Label> > &parens);
     49 
     50   const string& FstType() const { return fst_type_; }
     51   const string& ArcType() const { return A::Type(); }
     52 
     53   int64 NumStates() const { return nstates_; }
     54   int64 NumArcs() const { return narcs_; }
     55   int64 NumOpenParens() const { return nopen_parens_; }
     56   int64 NumCloseParens() const { return nclose_parens_; }
     57   int64 NumUniqueOpenParens() const { return nuniq_open_parens_; }
     58   int64 NumUniqueCloseParens() const { return nuniq_close_parens_; }
     59   int64 NumOpenParenStates() const { return nopen_paren_states_; }
     60   int64 NumCloseParenStates() const { return nclose_paren_states_; }
     61 
     62  private:
     63   string fst_type_;
     64   int64 nstates_;
     65   int64 narcs_;
     66   int64 nopen_parens_;
     67   int64 nclose_parens_;
     68   int64 nuniq_open_parens_;
     69   int64 nuniq_close_parens_;
     70   int64 nopen_paren_states_;
     71   int64 nclose_paren_states_;
     72 
     73   DISALLOW_COPY_AND_ASSIGN(PdtInfo);
     74 };
     75 
     76 template <class A>
     77 PdtInfo<A>::PdtInfo(const Fst<A> &fst,
     78                  const vector<pair<typename A::Label,
     79                                    typename A::Label> > &parens)
     80   : fst_type_(fst.Type()),
     81     nstates_(0),
     82     narcs_(0),
     83     nopen_parens_(0),
     84     nclose_parens_(0),
     85     nuniq_open_parens_(0),
     86     nuniq_close_parens_(0),
     87     nopen_paren_states_(0),
     88     nclose_paren_states_(0) {
     89   unordered_map<Label, size_t> paren_map;
     90   unordered_set<Label> paren_set;
     91   unordered_set<StateId> open_paren_state_set;
     92   unordered_set<StateId> close_paren_state_set;
     93 
     94   for (size_t i = 0; i < parens.size(); ++i) {
     95     const pair<Label, Label>  &p = parens[i];
     96     paren_map[p.first] = i;
     97     paren_map[p.second] = i;
     98   }
     99 
    100   for (StateIterator< Fst<A> > siter(fst);
    101        !siter.Done();
    102        siter.Next()) {
    103     ++nstates_;
    104     StateId s = siter.Value();
    105     for (ArcIterator< Fst<A> > aiter(fst, s);
    106          !aiter.Done();
    107          aiter.Next()) {
    108       const A &arc = aiter.Value();
    109       ++narcs_;
    110       typename unordered_map<Label, size_t>::const_iterator pit
    111         = paren_map.find(arc.ilabel);
    112       if (pit != paren_map.end()) {
    113         Label open_paren =  parens[pit->second].first;
    114         Label close_paren =  parens[pit->second].second;
    115         if (arc.ilabel == open_paren) {
    116           ++nopen_parens_;
    117           if (!paren_set.count(open_paren)) {
    118             ++nuniq_open_parens_;
    119             paren_set.insert(open_paren);
    120           }
    121           if (!open_paren_state_set.count(arc.nextstate)) {
    122             ++nopen_paren_states_;
    123             open_paren_state_set.insert(arc.nextstate);
    124           }
    125         } else {
    126           ++nclose_parens_;
    127           if (!paren_set.count(close_paren)) {
    128             ++nuniq_close_parens_;
    129             paren_set.insert(close_paren);
    130           }
    131           if (!close_paren_state_set.count(s)) {
    132             ++nclose_paren_states_;
    133             close_paren_state_set.insert(s);
    134           }
    135 
    136         }
    137       }
    138     }
    139   }
    140 }
    141 
    142 
    143 template <class A>
    144 void PrintPdtInfo(const PdtInfo<A> &pdtinfo) {
    145   ios_base::fmtflags old = cout.setf(ios::left);
    146   cout.width(50);
    147   cout << "fst type" << pdtinfo.FstType().c_str() << endl;
    148   cout.width(50);
    149   cout << "arc type" << pdtinfo.ArcType().c_str() << endl;
    150   cout.width(50);
    151   cout << "# of states" << pdtinfo.NumStates() << endl;
    152   cout.width(50);
    153   cout << "# of arcs" << pdtinfo.NumArcs() << endl;
    154   cout.width(50);
    155   cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl;
    156   cout.width(50);
    157   cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl;
    158   cout.width(50);
    159   cout << "# of unique open parentheses"
    160        << pdtinfo.NumUniqueOpenParens() << endl;
    161   cout.width(50);
    162   cout << "# of unique close parentheses"
    163        << pdtinfo.NumUniqueCloseParens() << endl;
    164   cout.width(50);
    165   cout << "# of open parenthesis dest. states"
    166        << pdtinfo.NumOpenParenStates() << endl;
    167   cout.width(50);
    168   cout << "# of close parenthesis source states"
    169        << pdtinfo.NumCloseParenStates() << endl;
    170   cout.setf(old);
    171 }
    172 
    173 }  // namespace fst
    174 
    175 #endif  // FST_EXTENSIONS_PDT_INFO_H__
    176