1 // connect.h 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // 16 // \file 17 // Classes and functions to remove unsuccessful paths from an Fst. 18 19 #ifndef FST_LIB_CONNECT_H__ 20 #define FST_LIB_CONNECT_H__ 21 22 #include "fst/lib/mutable-fst.h" 23 24 namespace fst { 25 26 // Finds and returns strongly-connected components, accessible and 27 // coaccessible states and related properties. Uses Tarzan's single 28 // DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer 29 // Algorithms", 189pp). 30 template <class A> 31 class SccVisitor { 32 public: 33 typedef A Arc; 34 typedef typename Arc::Weight Weight; 35 typedef typename A::StateId StateId; 36 37 // scc[i]: strongly-connected component number for state i. 38 // SCC numbers will be in topological order for acyclic input. 39 // access[i]: accessibility of state i. 40 // coaccess[i]: coaccessibility of state i. 41 // Any of above can be NULL. 42 // props: related property bits (cyclicity, initial cyclicity, 43 // accessibiliity, coaccessiblity) set/cleared (o.w. unchanged). 44 SccVisitor(vector<StateId> *scc, vector<bool> *access, 45 vector<bool> *coaccess, uint64 *props) 46 : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {} 47 SccVisitor(uint64 *props) 48 : scc_(0), access_(0), coaccess_(0), props_(props) {} 49 50 void InitVisit(const Fst<A> &fst) { 51 if (scc_) 52 scc_->clear(); 53 if (access_) 54 access_->clear(); 55 if (coaccess_) { 56 coaccess_->clear(); 57 coaccess_internal_ = false; 58 } else { 59 coaccess_ = new vector<bool>; 60 coaccess_internal_ = true; 61 } 62 *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible; 63 *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible); 64 fst_ = &fst; 65 start_ = fst.Start(); 66 nstates_ = 0; 67 nscc_ = 0; 68 dfnumber_ = new vector<StateId>; 69 lowlink_ = new vector<StateId>; 70 onstack_ = new vector<bool>; 71 scc_stack_ = new vector<StateId>; 72 } 73 74 bool InitState(StateId s, StateId root) { 75 scc_stack_->push_back(s); 76 while ((StateId)dfnumber_->size() <= s) { 77 if (scc_) 78 scc_->push_back(-1); 79 if (access_) 80 access_->push_back(false); 81 coaccess_->push_back(false); 82 dfnumber_->push_back(-1); 83 lowlink_->push_back(-1); 84 onstack_->push_back(false); 85 } 86 (*dfnumber_)[s] = nstates_; 87 (*lowlink_)[s] = nstates_; 88 (*onstack_)[s] = true; 89 if (root == start_) { 90 if (access_) 91 (*access_)[s] = true; 92 } else { 93 if (access_) 94 (*access_)[s] = false; 95 *props_ |= kNotAccessible; 96 *props_ &= ~kAccessible; 97 } 98 ++nstates_; 99 return true; 100 } 101 102 bool TreeArc(StateId s, const A &arc) { return true; } 103 104 bool BackArc(StateId s, const A &arc) { 105 StateId t = arc.nextstate; 106 if ((*dfnumber_)[t] < (*lowlink_)[s]) 107 (*lowlink_)[s] = (*dfnumber_)[t]; 108 if ((*coaccess_)[t]) 109 (*coaccess_)[s] = true; 110 *props_ |= kCyclic; 111 *props_ &= ~kAcyclic; 112 if (arc.nextstate == start_) { 113 *props_ |= kInitialCyclic; 114 *props_ &= ~kInitialAcyclic; 115 } 116 return true; 117 } 118 119 bool ForwardOrCrossArc(StateId s, const A &arc) { 120 StateId t = arc.nextstate; 121 if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ && 122 (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s]) 123 (*lowlink_)[s] = (*dfnumber_)[t]; 124 if ((*coaccess_)[t]) 125 (*coaccess_)[s] = true; 126 return true; 127 } 128 129 void FinishState(StateId s, StateId p, const A *) { 130 if (fst_->Final(s) != Weight::Zero()) 131 (*coaccess_)[s] = true; 132 if ((*dfnumber_)[s] == (*lowlink_)[s]) { // root of new SCC 133 bool scc_coaccess = false; 134 size_t i = scc_stack_->size(); 135 StateId t; 136 do { 137 t = (*scc_stack_)[--i]; 138 if ((*coaccess_)[t]) 139 scc_coaccess = true; 140 } while (s != t); 141 do { 142 t = scc_stack_->back(); 143 if (scc_) 144 (*scc_)[t] = nscc_; 145 if (scc_coaccess) 146 (*coaccess_)[t] = true; 147 (*onstack_)[t] = false; 148 scc_stack_->pop_back(); 149 } while (s != t); 150 if (!scc_coaccess) { 151 *props_ |= kNotCoAccessible; 152 *props_ &= ~kCoAccessible; 153 } 154 ++nscc_; 155 } 156 if (p != kNoStateId) { 157 if ((*coaccess_)[s]) 158 (*coaccess_)[p] = true; 159 if ((*lowlink_)[s] < (*lowlink_)[p]) 160 (*lowlink_)[p] = (*lowlink_)[s]; 161 } 162 } 163 164 void FinishVisit() { 165 // Numbers SCC's in topological order when acyclic. 166 if (scc_) 167 for (StateId i = 0; i < (StateId)scc_->size(); ++i) 168 (*scc_)[i] = nscc_ - 1 - (*scc_)[i]; 169 if (coaccess_internal_) 170 delete coaccess_; 171 delete dfnumber_; 172 delete lowlink_; 173 delete onstack_; 174 delete scc_stack_; 175 } 176 177 private: 178 vector<StateId> *scc_; // State's scc number 179 vector<bool> *access_; // State's accessibility 180 vector<bool> *coaccess_; // State's coaccessibility 181 uint64 *props_; 182 const Fst<A> *fst_; 183 StateId start_; 184 StateId nstates_; // State count 185 StateId nscc_; // SCC count 186 bool coaccess_internal_; 187 vector<StateId> *dfnumber_; // state discovery times 188 vector<StateId> *lowlink_; // lowlink[s] == dfnumber[s] => SCC root 189 vector<bool> *onstack_; // is a state on the SCC stack 190 vector<StateId> *scc_stack_; // SCC stack (w/ random access) 191 }; 192 193 194 // Trims an FST, removing states and arcs that are not on successful 195 // paths. This version modifies its input. 196 // 197 // Complexity: 198 // - Time: O(V + E) 199 // - Space: O(V + E) 200 // where V = # of states and E = # of arcs. 201 template<class Arc> 202 void Connect(MutableFst<Arc> *fst) { 203 typedef typename Arc::StateId StateId; 204 205 vector<bool> access; 206 vector<bool> coaccess; 207 uint64 props = 0; 208 SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props); 209 DfsVisit(*fst, &scc_visitor); 210 vector<StateId> dstates; 211 for (StateId s = 0; s < (StateId)access.size(); ++s) 212 if (!access[s] || !coaccess[s]) 213 dstates.push_back(s); 214 fst->DeleteStates(dstates); 215 fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible); 216 } 217 218 } // namespace fst 219 220 #endif // FST_LIB_CONNECT_H__ 221