Home | History | Annotate | Download | only in lib
      1 // equivalent.h
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 //
     16 // \file Functions and classes to determine the equivalence of two
     17 // FSTs.
     18 
     19 #ifndef FST_LIB_EQUIVALENT_H__
     20 #define FST_LIB_EQUIVALENT_H__
     21 
     22 #include <algorithm>
     23 
     24 #include <unordered_map>
     25 
     26 #include "fst/lib/encode.h"
     27 #include "fst/lib/push.h"
     28 #include "fst/lib/union-find.h"
     29 #include "fst/lib/vector-fst.h"
     30 
     31 namespace fst {
     32 
     33 // Traits-like struct holding utility functions/typedefs/constants for
     34 // the equivalence algorithm.
     35 //
     36 // Encoding device: in order to make the statesets of the two acceptors
     37 // disjoint, we map Arc::StateId on the type MappedId. The states of
     38 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and
     39 // those of the second one on even numbers (s -> 2s + 2). The number 0
     40 // is reserved for an implicit (non-final) 'dead state' (required for
     41 // the correct treatment of non-coaccessible states; kNoStateId is
     42 // mapped to kDeadState for both acceptors). The union-find algorithm
     43 // operates on the mapped IDs.
     44 template <class Arc>
     45 struct EquivalenceUtil {
     46   typedef typename Arc::StateId StateId;
     47   typedef typename Arc::Weight Weight;
     48   typedef int32 MappedId;  // ID for an equivalence class.
     49 
     50   // MappedId for an implicit dead state.
     51   static const MappedId kDeadState = 0;
     52 
     53   // MappedId for lookup failure.
     54   static const MappedId kInvalidId = -1;
     55 
     56   // Maps state ID to the representative of the corresponding
     57   // equivalence class. The parameter 'which_fst' takes the values 1
     58   // and 2, identifying the input FST.
     59   static MappedId MapState(StateId s, int32 which_fst) {
     60     return
     61       (kNoStateId == s)
     62       ?
     63       kDeadState
     64       :
     65       (static_cast<MappedId>(s) << 1) + which_fst;
     66   }
     67   // Maps set ID to State ID.
     68   static StateId UnMapState(MappedId id) {
     69     return static_cast<StateId>((--id) >> 1);
     70   }
     71   // Convenience function: checks if state with MappedId 's' is final
     72   // in acceptor 'fa'.
     73   static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
     74     return
     75       (kDeadState == s) ?
     76       false : (fa.Final(UnMapState(s)) != Weight::Zero());
     77   }
     78   // Convenience function: returns the representative of 'id' in 'sets',
     79   // creating a new set if needed.
     80   static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
     81     MappedId repr = sets->FindSet(id);
     82     if (repr != kInvalidId) {
     83       return repr;
     84     } else {
     85       sets->MakeSet(id);
     86       return id;
     87     }
     88   }
     89 };
     90 
     91 // Equivalence checking algorithm: determines if the two FSTs
     92 // <code>fst1</code> and <code>fst2</code> are equivalent. The input
     93 // FSTs must be deterministic input-side epsilon-free acceptors,
     94 // unweighted or with weights over a left semiring. Two acceptors are
     95 // considered equivalent if they accept exactly the same set of
     96 // strings (with the same weights).
     97 //
     98 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
     99 // Analysis of Computer Programs") successively constructs sets of
    100 // states that can be reached by the same prefixes, starting with a
    101 // set containing the start states of both acceptors. A disjoint tree
    102 // forest (the union-find algorithm) is used to represent the sets of
    103 // states. The algorithm returns 'false' if one of the constructed
    104 // sets contains both final and non-final states.
    105 //
    106 // Complexity: quasi-linear, i.e. O(n G(n)), where
    107 //   n = |S1| + |S2| is the number of states in both acceptors
    108 //   G(n) is a very slowly growing function that can be approximated
    109 //        by 4 by all practical purposes.
    110 //
    111 template <class Arc>
    112 bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {
    113   typedef typename Arc::Weight Weight;
    114   // Check properties first:
    115   uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
    116   if (fst1.Properties(props, true) != props) {
    117     LOG(FATAL) << "Equivalent: first argument not an"
    118                << " epsilon-free deterministic acceptor";
    119   }
    120   if (fst2.Properties(props, true) != props) {
    121     LOG(FATAL) << "Equivalent: second argument not an"
    122                << " epsilon-free deterministic acceptor";
    123   }
    124 
    125   if ((fst1.Properties(kUnweighted , true) != kUnweighted)
    126       || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
    127     VectorFst<Arc> efst1(fst1);
    128     VectorFst<Arc> efst2(fst2);
    129     Push(&efst1, REWEIGHT_TO_INITIAL);
    130     Push(&efst2, REWEIGHT_TO_INITIAL);
    131     Map(&efst1, QuantizeMapper<Arc>());
    132     Map(&efst2, QuantizeMapper<Arc>());
    133     EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
    134     Map(&efst1, &mapper);
    135     Map(&efst2, &mapper);
    136     return Equivalent(efst1, efst2);
    137   }
    138 
    139   // Convenience typedefs:
    140   typedef typename Arc::StateId StateId;
    141   typedef EquivalenceUtil<Arc> Util;
    142   typedef typename Util::MappedId MappedId;
    143   enum { FST1 = 1, FST2 = 2 };  // Required by Util::MapState(...)
    144 
    145   MappedId s1 = Util::MapState(fst1.Start(), FST1);
    146   MappedId s2 = Util::MapState(fst2.Start(), FST2);
    147 
    148   // The union-find structure.
    149   UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
    150 
    151   // Initialize the union-find structure.
    152   eq_classes.MakeSet(s1);
    153   eq_classes.MakeSet(s2);
    154 
    155   // Early return if the start states differ w.r.t. being final.
    156   if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
    157     return false;
    158   }
    159   // Data structure for the (partial) acceptor transition function of
    160   // fst1 and fst2: input labels mapped to pairs of MappedId's
    161   // representing destination states of the corresponding arcs in fst1
    162   // and fst2, respectively.
    163   typedef
    164     std::unordered_map<typename Arc::Label, pair<MappedId, MappedId> >
    165     Label2StatePairMap;
    166 
    167   Label2StatePairMap arc_pairs;
    168 
    169   // Pairs of MappedId's to be processed, organized in a queue.
    170   deque<pair<MappedId, MappedId> > q;
    171 
    172   // Main loop: explores the two acceptors in a breadth-first manner,
    173   // updating the equivalence relation on the statesets. Loop
    174   // invariant: each block of states contains either final states only
    175   // or non-final states only.
    176   for (q.push_back(make_pair(s1, s2)); !q.empty(); q.pop_front()) {
    177     s1 = q.front().first;
    178     s2 = q.front().second;
    179 
    180     // Representatives of the equivalence classes of s1/s2.
    181     MappedId rep1 = Util::FindSet(&eq_classes, s1);
    182     MappedId rep2 = Util::FindSet(&eq_classes, s2);
    183 
    184     if (rep1 != rep2) {
    185       eq_classes.Union(rep1, rep2);
    186       arc_pairs.clear();
    187 
    188       // Copy outgoing arcs starting at s1 into the hashtable.
    189       if (Util::kDeadState != s1) {
    190         ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
    191         for (; !arc_iter.Done(); arc_iter.Next()) {
    192           const Arc &arc = arc_iter.Value();
    193           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
    194                                                    // are treated as
    195                                                    // non-exisitent.
    196             arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
    197           }
    198         }
    199       }
    200       // Copy outgoing arcs starting at s2 into the hashtable.
    201       if (Util::kDeadState != s2) {
    202         ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
    203         for (; !arc_iter.Done(); arc_iter.Next()) {
    204           const Arc &arc = arc_iter.Value();
    205           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
    206                                                    // are treated as
    207                                                    // non-existent.
    208             arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
    209           }
    210         }
    211       }
    212       // Iterate through the hashtable and process pairs of target
    213       // states.
    214       for (typename Label2StatePairMap::const_iterator
    215              arc_iter = arc_pairs.begin();
    216            arc_iter != arc_pairs.end();
    217            ++arc_iter) {
    218         const pair<MappedId, MappedId> &p = arc_iter->second;
    219         if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
    220           // Detected inconsistency: return false.
    221           return false;
    222         }
    223         q.push_back(p);
    224       }
    225     }
    226   }
    227   return true;
    228 }
    229 
    230 }  // namespace fst
    231 
    232 #endif  // FST_LIB_EQUIVALENT_H__
    233