Home | History | Annotate | Download | only in lib
      1 // connect.h
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 //
     16 // \file
     17 // Classes and functions to remove unsuccessful paths from an Fst.
     18 
     19 #ifndef FST_LIB_CONNECT_H__
     20 #define FST_LIB_CONNECT_H__
     21 
     22 #include "fst/lib/mutable-fst.h"
     23 
     24 namespace fst {
     25 
     26 // Finds and returns strongly-connected components, accessible and
     27 // coaccessible states and related properties. Uses Tarzan's single
     28 // DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
     29 // Algorithms", 189pp).
     30 template <class A>
     31 class SccVisitor {
     32  public:
     33   typedef A Arc;
     34   typedef typename Arc::Weight Weight;
     35   typedef typename A::StateId StateId;
     36 
     37   // scc[i]: strongly-connected component number for state i.
     38   //   SCC numbers will be in topological order for acyclic input.
     39   // access[i]: accessibility of state i.
     40   // coaccess[i]: coaccessibility of state i.
     41   // Any of above can be NULL.
     42   // props: related property bits (cyclicity, initial cyclicity,
     43   //   accessibiliity, coaccessiblity) set/cleared (o.w. unchanged).
     44   SccVisitor(vector<StateId> *scc, vector<bool> *access,
     45              vector<bool> *coaccess, uint64 *props)
     46       : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
     47   SccVisitor(uint64 *props)
     48       : scc_(0), access_(0), coaccess_(0), props_(props) {}
     49 
     50   void InitVisit(const Fst<A> &fst) {
     51     if (scc_)
     52       scc_->clear();
     53     if (access_)
     54       access_->clear();
     55     if (coaccess_) {
     56       coaccess_->clear();
     57       coaccess_internal_ = false;
     58     } else {
     59       coaccess_ = new vector<bool>;
     60       coaccess_internal_ = true;
     61     }
     62     *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
     63     *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
     64     fst_ = &fst;
     65     start_ = fst.Start();
     66     nstates_ = 0;
     67     nscc_ = 0;
     68     dfnumber_ = new vector<StateId>;
     69     lowlink_ = new vector<StateId>;
     70     onstack_ = new vector<bool>;
     71     scc_stack_ = new vector<StateId>;
     72   }
     73 
     74   bool InitState(StateId s, StateId root) {
     75     scc_stack_->push_back(s);
     76     while ((StateId)dfnumber_->size() <= s) {
     77       if (scc_)
     78         scc_->push_back(-1);
     79       if (access_)
     80         access_->push_back(false);
     81       coaccess_->push_back(false);
     82       dfnumber_->push_back(-1);
     83       lowlink_->push_back(-1);
     84       onstack_->push_back(false);
     85     }
     86     (*dfnumber_)[s] = nstates_;
     87     (*lowlink_)[s] = nstates_;
     88     (*onstack_)[s] = true;
     89     if (root == start_) {
     90       if (access_)
     91         (*access_)[s] = true;
     92     } else {
     93       if (access_)
     94         (*access_)[s] = false;
     95       *props_ |= kNotAccessible;
     96       *props_ &= ~kAccessible;
     97     }
     98     ++nstates_;
     99     return true;
    100   }
    101 
    102   bool TreeArc(StateId s, const A &arc) { return true; }
    103 
    104   bool BackArc(StateId s, const A &arc) {
    105     StateId t = arc.nextstate;
    106     if ((*dfnumber_)[t] < (*lowlink_)[s])
    107       (*lowlink_)[s] = (*dfnumber_)[t];
    108     if ((*coaccess_)[t])
    109       (*coaccess_)[s] = true;
    110     *props_ |= kCyclic;
    111     *props_ &= ~kAcyclic;
    112     if (arc.nextstate == start_) {
    113       *props_ |= kInitialCyclic;
    114       *props_ &= ~kInitialAcyclic;
    115     }
    116     return true;
    117   }
    118 
    119   bool ForwardOrCrossArc(StateId s, const A &arc) {
    120     StateId t = arc.nextstate;
    121     if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
    122         (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
    123       (*lowlink_)[s] = (*dfnumber_)[t];
    124     if ((*coaccess_)[t])
    125       (*coaccess_)[s] = true;
    126     return true;
    127   }
    128 
    129   void FinishState(StateId s, StateId p, const A *) {
    130     if (fst_->Final(s) != Weight::Zero())
    131       (*coaccess_)[s] = true;
    132     if ((*dfnumber_)[s] == (*lowlink_)[s]) {  // root of new SCC
    133       bool scc_coaccess = false;
    134       size_t i = scc_stack_->size();
    135       StateId t;
    136       do {
    137         t = (*scc_stack_)[--i];
    138         if ((*coaccess_)[t])
    139           scc_coaccess = true;
    140       } while (s != t);
    141       do {
    142         t = scc_stack_->back();
    143         if (scc_)
    144           (*scc_)[t] = nscc_;
    145         if (scc_coaccess)
    146           (*coaccess_)[t] = true;
    147         (*onstack_)[t] = false;
    148         scc_stack_->pop_back();
    149       } while (s != t);
    150       if (!scc_coaccess) {
    151         *props_ |= kNotCoAccessible;
    152         *props_ &= ~kCoAccessible;
    153       }
    154       ++nscc_;
    155     }
    156     if (p != kNoStateId) {
    157       if ((*coaccess_)[s])
    158         (*coaccess_)[p] = true;
    159       if ((*lowlink_)[s] < (*lowlink_)[p])
    160         (*lowlink_)[p] = (*lowlink_)[s];
    161     }
    162   }
    163 
    164   void FinishVisit() {
    165     // Numbers SCC's in topological order when acyclic.
    166     if (scc_)
    167       for (StateId i = 0; i < (StateId)scc_->size(); ++i)
    168         (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
    169     if (coaccess_internal_)
    170       delete coaccess_;
    171     delete dfnumber_;
    172     delete lowlink_;
    173     delete onstack_;
    174     delete scc_stack_;
    175   }
    176 
    177  private:
    178   vector<StateId> *scc_;        // State's scc number
    179   vector<bool> *access_;        // State's accessibility
    180   vector<bool> *coaccess_;      // State's coaccessibility
    181   uint64 *props_;
    182   const Fst<A> *fst_;
    183   StateId start_;
    184   StateId nstates_;             // State count
    185   StateId nscc_;                // SCC count
    186   bool coaccess_internal_;
    187   vector<StateId> *dfnumber_;   // state discovery times
    188   vector<StateId> *lowlink_;    // lowlink[s] == dfnumber[s] => SCC root
    189   vector<bool> *onstack_;       // is a state on the SCC stack
    190   vector<StateId> *scc_stack_;  // SCC stack (w/ random access)
    191 };
    192 
    193 
    194 // Trims an FST, removing states and arcs that are not on successful
    195 // paths. This version modifies its input.
    196 //
    197 // Complexity:
    198 // - Time:  O(V + E)
    199 // - Space: O(V + E)
    200 // where V = # of states and E = # of arcs.
    201 template<class Arc>
    202 void Connect(MutableFst<Arc> *fst) {
    203   typedef typename Arc::StateId StateId;
    204 
    205   vector<bool> access;
    206   vector<bool> coaccess;
    207   uint64 props = 0;
    208   SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
    209   DfsVisit(*fst, &scc_visitor);
    210   vector<StateId> dstates;
    211   for (StateId s = 0; s < (StateId)access.size(); ++s)
    212     if (!access[s] || !coaccess[s])
    213       dstates.push_back(s);
    214   fst->DeleteStates(dstates);
    215   fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
    216 }
    217 
    218 }  // namespace fst
    219 
    220 #endif  // FST_LIB_CONNECT_H__
    221