Home | History | Annotate | Download | only in fst
      1 // equivalent.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: wojciech (at) google.com (Wojciech Skut)
     17 //
     18 // \file Functions and classes to determine the equivalence of two
     19 // FSTs.
     20 
     21 #ifndef FST_LIB_EQUIVALENT_H__
     22 #define FST_LIB_EQUIVALENT_H__
     23 
     24 #include <algorithm>
     25 #include <deque>
     26 using std::deque;
     27 #include <tr1/unordered_map>
     28 using std::tr1::unordered_map;
     29 using std::tr1::unordered_multimap;
     30 #include <utility>
     31 using std::pair; using std::make_pair;
     32 #include <vector>
     33 using std::vector;
     34 
     35 #include <fst/encode.h>
     36 #include <fst/push.h>
     37 #include <fst/union-find.h>
     38 #include <fst/vector-fst.h>
     39 
     40 
     41 namespace fst {
     42 
     43 // Traits-like struct holding utility functions/typedefs/constants for
     44 // the equivalence algorithm.
     45 //
     46 // Encoding device: in order to make the statesets of the two acceptors
     47 // disjoint, we map Arc::StateId on the type MappedId. The states of
     48 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and
     49 // those of the second one on even numbers (s -> 2s + 2). The number 0
     50 // is reserved for an implicit (non-final) 'dead state' (required for
     51 // the correct treatment of non-coaccessible states; kNoStateId is
     52 // mapped to kDeadState for both acceptors). The union-find algorithm
     53 // operates on the mapped IDs.
     54 template <class Arc>
     55 struct EquivalenceUtil {
     56   typedef typename Arc::StateId StateId;
     57   typedef typename Arc::Weight Weight;
     58   typedef StateId MappedId;  // ID for an equivalence class.
     59 
     60   // MappedId for an implicit dead state.
     61   static const MappedId kDeadState = 0;
     62 
     63   // MappedId for lookup failure.
     64   static const MappedId kInvalidId = -1;
     65 
     66   // Maps state ID to the representative of the corresponding
     67   // equivalence class. The parameter 'which_fst' takes the values 1
     68   // and 2, identifying the input FST.
     69   static MappedId MapState(StateId s, int32 which_fst) {
     70     return
     71       (kNoStateId == s)
     72       ?
     73       kDeadState
     74       :
     75       (static_cast<MappedId>(s) << 1) + which_fst;
     76   }
     77   // Maps set ID to State ID.
     78   static StateId UnMapState(MappedId id) {
     79     return static_cast<StateId>((--id) >> 1);
     80   }
     81   // Convenience function: checks if state with MappedId 's' is final
     82   // in acceptor 'fa'.
     83   static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
     84     return
     85       (kDeadState == s) ?
     86       false : (fa.Final(UnMapState(s)) != Weight::Zero());
     87   }
     88   // Convenience function: returns the representative of 'id' in 'sets',
     89   // creating a new set if needed.
     90   static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
     91     MappedId repr = sets->FindSet(id);
     92     if (repr != kInvalidId) {
     93       return repr;
     94     } else {
     95       sets->MakeSet(id);
     96       return id;
     97     }
     98   }
     99 };
    100 
    101 template <class Arc> const
    102 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState;
    103 
    104 template <class Arc> const
    105 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId;
    106 
    107 
    108 // Equivalence checking algorithm: determines if the two FSTs
    109 // <code>fst1</code> and <code>fst2</code> are equivalent. The input
    110 // FSTs must be deterministic input-side epsilon-free acceptors,
    111 // unweighted or with weights over a left semiring. Two acceptors are
    112 // considered equivalent if they accept exactly the same set of
    113 // strings (with the same weights).
    114 //
    115 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
    116 // Analysis of Computer Programs") successively constructs sets of
    117 // states that can be reached by the same prefixes, starting with a
    118 // set containing the start states of both acceptors. A disjoint tree
    119 // forest (the union-find algorithm) is used to represent the sets of
    120 // states. The algorithm returns 'false' if one of the constructed
    121 // sets contains both final and non-final states. Returns optional error
    122 // value (when FLAGS_error_fatal = false).
    123 //
    124 // Complexity: quasi-linear, i.e. O(n G(n)), where
    125 //   n = |S1| + |S2| is the number of states in both acceptors
    126 //   G(n) is a very slowly growing function that can be approximated
    127 //        by 4 by all practical purposes.
    128 //
    129 template <class Arc>
    130 bool Equivalent(const Fst<Arc> &fst1,
    131                 const Fst<Arc> &fst2,
    132                 double delta = kDelta, bool *error = 0) {
    133   typedef typename Arc::Weight Weight;
    134   if (error) *error = false;
    135 
    136   // Check that the symbol table are compatible
    137   if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
    138       !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
    139     FSTERROR() << "Equivalent: input/output symbol tables of 1st argument "
    140                << "do not match input/output symbol tables of 2nd argument";
    141     if (error) *error = true;
    142     return false;
    143   }
    144   // Check properties first:
    145   uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
    146   if (fst1.Properties(props, true) != props) {
    147     FSTERROR() << "Equivalent: first argument not an"
    148                << " epsilon-free deterministic acceptor";
    149     if (error) *error = true;
    150     return false;
    151   }
    152   if (fst2.Properties(props, true) != props) {
    153     FSTERROR() << "Equivalent: second argument not an"
    154                << " epsilon-free deterministic acceptor";
    155     if (error) *error = true;
    156     return false;
    157   }
    158 
    159   if ((fst1.Properties(kUnweighted , true) != kUnweighted)
    160       || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
    161     VectorFst<Arc> efst1(fst1);
    162     VectorFst<Arc> efst2(fst2);
    163     Push(&efst1, REWEIGHT_TO_INITIAL, delta);
    164     Push(&efst2, REWEIGHT_TO_INITIAL, delta);
    165     ArcMap(&efst1, QuantizeMapper<Arc>(delta));
    166     ArcMap(&efst2, QuantizeMapper<Arc>(delta));
    167     EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
    168     ArcMap(&efst1, &mapper);
    169     ArcMap(&efst2, &mapper);
    170     return Equivalent(efst1, efst2);
    171   }
    172 
    173   // Convenience typedefs:
    174   typedef typename Arc::StateId StateId;
    175   typedef EquivalenceUtil<Arc> Util;
    176   typedef typename Util::MappedId MappedId;
    177   enum { FST1 = 1, FST2 = 2 };  // Required by Util::MapState(...)
    178 
    179   MappedId s1 = Util::MapState(fst1.Start(), FST1);
    180   MappedId s2 = Util::MapState(fst2.Start(), FST2);
    181 
    182   // The union-find structure.
    183   UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
    184 
    185   // Initialize the union-find structure.
    186   eq_classes.MakeSet(s1);
    187   eq_classes.MakeSet(s2);
    188 
    189   // Data structure for the (partial) acceptor transition function of
    190   // fst1 and fst2: input labels mapped to pairs of MappedId's
    191   // representing destination states of the corresponding arcs in fst1
    192   // and fst2, respectively.
    193   typedef
    194     unordered_map<typename Arc::Label, pair<MappedId, MappedId> >
    195     Label2StatePairMap;
    196 
    197   Label2StatePairMap arc_pairs;
    198 
    199   // Pairs of MappedId's to be processed, organized in a queue.
    200   deque<pair<MappedId, MappedId> > q;
    201 
    202   bool ret = true;
    203   // Early return if the start states differ w.r.t. being final.
    204   if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
    205     ret = false;
    206   }
    207 
    208   // Main loop: explores the two acceptors in a breadth-first manner,
    209   // updating the equivalence relation on the statesets. Loop
    210   // invariant: each block of states contains either final states only
    211   // or non-final states only.
    212   for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) {
    213     s1 = q.front().first;
    214     s2 = q.front().second;
    215 
    216     // Representatives of the equivalence classes of s1/s2.
    217     MappedId rep1 = Util::FindSet(&eq_classes, s1);
    218     MappedId rep2 = Util::FindSet(&eq_classes, s2);
    219 
    220     if (rep1 != rep2) {
    221       eq_classes.Union(rep1, rep2);
    222       arc_pairs.clear();
    223 
    224       // Copy outgoing arcs starting at s1 into the hashtable.
    225       if (Util::kDeadState != s1) {
    226         ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
    227         for (; !arc_iter.Done(); arc_iter.Next()) {
    228           const Arc &arc = arc_iter.Value();
    229           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
    230                                                    // are treated as
    231                                                    // non-exisitent.
    232             arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
    233           }
    234         }
    235       }
    236       // Copy outgoing arcs starting at s2 into the hashtable.
    237       if (Util::kDeadState != s2) {
    238         ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
    239         for (; !arc_iter.Done(); arc_iter.Next()) {
    240           const Arc &arc = arc_iter.Value();
    241           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
    242                                                    // are treated as
    243                                                    // non-existent.
    244             arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
    245           }
    246         }
    247       }
    248       // Iterate through the hashtable and process pairs of target
    249       // states.
    250       for (typename Label2StatePairMap::const_iterator
    251              arc_iter = arc_pairs.begin();
    252            arc_iter != arc_pairs.end();
    253            ++arc_iter) {
    254         const pair<MappedId, MappedId> &p = arc_iter->second;
    255         if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
    256           // Detected inconsistency: return false.
    257           ret = false;
    258           break;
    259         }
    260         q.push_back(p);
    261       }
    262     }
    263   }
    264 
    265   if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
    266     if (error) *error = true;
    267     return false;
    268   }
    269 
    270   return ret;
    271 }
    272 
    273 }  // namespace fst
    274 
    275 #endif  // FST_LIB_EQUIVALENT_H__
    276