1 // equivalent.h 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // 16 // \file Functions and classes to determine the equivalence of two 17 // FSTs. 18 19 #ifndef FST_LIB_EQUIVALENT_H__ 20 #define FST_LIB_EQUIVALENT_H__ 21 22 #include <algorithm> 23 24 #include <unordered_map> 25 26 #include "fst/lib/encode.h" 27 #include "fst/lib/push.h" 28 #include "fst/lib/union-find.h" 29 #include "fst/lib/vector-fst.h" 30 31 namespace fst { 32 33 // Traits-like struct holding utility functions/typedefs/constants for 34 // the equivalence algorithm. 35 // 36 // Encoding device: in order to make the statesets of the two acceptors 37 // disjoint, we map Arc::StateId on the type MappedId. The states of 38 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and 39 // those of the second one on even numbers (s -> 2s + 2). The number 0 40 // is reserved for an implicit (non-final) 'dead state' (required for 41 // the correct treatment of non-coaccessible states; kNoStateId is 42 // mapped to kDeadState for both acceptors). The union-find algorithm 43 // operates on the mapped IDs. 44 template <class Arc> 45 struct EquivalenceUtil { 46 typedef typename Arc::StateId StateId; 47 typedef typename Arc::Weight Weight; 48 typedef int32 MappedId; // ID for an equivalence class. 49 50 // MappedId for an implicit dead state. 51 static const MappedId kDeadState = 0; 52 53 // MappedId for lookup failure. 54 static const MappedId kInvalidId = -1; 55 56 // Maps state ID to the representative of the corresponding 57 // equivalence class. The parameter 'which_fst' takes the values 1 58 // and 2, identifying the input FST. 59 static MappedId MapState(StateId s, int32 which_fst) { 60 return 61 (kNoStateId == s) 62 ? 63 kDeadState 64 : 65 (static_cast<MappedId>(s) << 1) + which_fst; 66 } 67 // Maps set ID to State ID. 68 static StateId UnMapState(MappedId id) { 69 return static_cast<StateId>((--id) >> 1); 70 } 71 // Convenience function: checks if state with MappedId 's' is final 72 // in acceptor 'fa'. 73 static bool IsFinal(const Fst<Arc> &fa, MappedId s) { 74 return 75 (kDeadState == s) ? 76 false : (fa.Final(UnMapState(s)) != Weight::Zero()); 77 } 78 // Convenience function: returns the representative of 'id' in 'sets', 79 // creating a new set if needed. 80 static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { 81 MappedId repr = sets->FindSet(id); 82 if (repr != kInvalidId) { 83 return repr; 84 } else { 85 sets->MakeSet(id); 86 return id; 87 } 88 } 89 }; 90 91 // Equivalence checking algorithm: determines if the two FSTs 92 // <code>fst1</code> and <code>fst2</code> are equivalent. The input 93 // FSTs must be deterministic input-side epsilon-free acceptors, 94 // unweighted or with weights over a left semiring. Two acceptors are 95 // considered equivalent if they accept exactly the same set of 96 // strings (with the same weights). 97 // 98 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and 99 // Analysis of Computer Programs") successively constructs sets of 100 // states that can be reached by the same prefixes, starting with a 101 // set containing the start states of both acceptors. A disjoint tree 102 // forest (the union-find algorithm) is used to represent the sets of 103 // states. The algorithm returns 'false' if one of the constructed 104 // sets contains both final and non-final states. 105 // 106 // Complexity: quasi-linear, i.e. O(n G(n)), where 107 // n = |S1| + |S2| is the number of states in both acceptors 108 // G(n) is a very slowly growing function that can be approximated 109 // by 4 by all practical purposes. 110 // 111 template <class Arc> 112 bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2) { 113 typedef typename Arc::Weight Weight; 114 // Check properties first: 115 uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; 116 if (fst1.Properties(props, true) != props) { 117 LOG(FATAL) << "Equivalent: first argument not an" 118 << " epsilon-free deterministic acceptor"; 119 } 120 if (fst2.Properties(props, true) != props) { 121 LOG(FATAL) << "Equivalent: second argument not an" 122 << " epsilon-free deterministic acceptor"; 123 } 124 125 if ((fst1.Properties(kUnweighted , true) != kUnweighted) 126 || (fst2.Properties(kUnweighted , true) != kUnweighted)) { 127 VectorFst<Arc> efst1(fst1); 128 VectorFst<Arc> efst2(fst2); 129 Push(&efst1, REWEIGHT_TO_INITIAL); 130 Push(&efst2, REWEIGHT_TO_INITIAL); 131 Map(&efst1, QuantizeMapper<Arc>()); 132 Map(&efst2, QuantizeMapper<Arc>()); 133 EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); 134 Map(&efst1, &mapper); 135 Map(&efst2, &mapper); 136 return Equivalent(efst1, efst2); 137 } 138 139 // Convenience typedefs: 140 typedef typename Arc::StateId StateId; 141 typedef EquivalenceUtil<Arc> Util; 142 typedef typename Util::MappedId MappedId; 143 enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) 144 145 MappedId s1 = Util::MapState(fst1.Start(), FST1); 146 MappedId s2 = Util::MapState(fst2.Start(), FST2); 147 148 // The union-find structure. 149 UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); 150 151 // Initialize the union-find structure. 152 eq_classes.MakeSet(s1); 153 eq_classes.MakeSet(s2); 154 155 // Early return if the start states differ w.r.t. being final. 156 if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { 157 return false; 158 } 159 // Data structure for the (partial) acceptor transition function of 160 // fst1 and fst2: input labels mapped to pairs of MappedId's 161 // representing destination states of the corresponding arcs in fst1 162 // and fst2, respectively. 163 typedef 164 std::unordered_map<typename Arc::Label, pair<MappedId, MappedId> > 165 Label2StatePairMap; 166 167 Label2StatePairMap arc_pairs; 168 169 // Pairs of MappedId's to be processed, organized in a queue. 170 deque<pair<MappedId, MappedId> > q; 171 172 // Main loop: explores the two acceptors in a breadth-first manner, 173 // updating the equivalence relation on the statesets. Loop 174 // invariant: each block of states contains either final states only 175 // or non-final states only. 176 for (q.push_back(make_pair(s1, s2)); !q.empty(); q.pop_front()) { 177 s1 = q.front().first; 178 s2 = q.front().second; 179 180 // Representatives of the equivalence classes of s1/s2. 181 MappedId rep1 = Util::FindSet(&eq_classes, s1); 182 MappedId rep2 = Util::FindSet(&eq_classes, s2); 183 184 if (rep1 != rep2) { 185 eq_classes.Union(rep1, rep2); 186 arc_pairs.clear(); 187 188 // Copy outgoing arcs starting at s1 into the hashtable. 189 if (Util::kDeadState != s1) { 190 ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); 191 for (; !arc_iter.Done(); arc_iter.Next()) { 192 const Arc &arc = arc_iter.Value(); 193 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 194 // are treated as 195 // non-exisitent. 196 arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); 197 } 198 } 199 } 200 // Copy outgoing arcs starting at s2 into the hashtable. 201 if (Util::kDeadState != s2) { 202 ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); 203 for (; !arc_iter.Done(); arc_iter.Next()) { 204 const Arc &arc = arc_iter.Value(); 205 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 206 // are treated as 207 // non-existent. 208 arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); 209 } 210 } 211 } 212 // Iterate through the hashtable and process pairs of target 213 // states. 214 for (typename Label2StatePairMap::const_iterator 215 arc_iter = arc_pairs.begin(); 216 arc_iter != arc_pairs.end(); 217 ++arc_iter) { 218 const pair<MappedId, MappedId> &p = arc_iter->second; 219 if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { 220 // Detected inconsistency: return false. 221 return false; 222 } 223 q.push_back(p); 224 } 225 } 226 } 227 return true; 228 } 229 230 } // namespace fst 231 232 #endif // FST_LIB_EQUIVALENT_H__ 233