Home | History | Annotate | Download | only in fst
      1 // randequivalent.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: allauzen (at) google.com (Cyril Allauzen)
     17 //
     18 // \file
     19 // Tests if two FSTS are equivalent by checking if random
     20 // strings from one FST are transduced the same by both FSTs.
     21 
     22 #ifndef FST_RANDEQUIVALENT_H__
     23 #define FST_RANDEQUIVALENT_H__
     24 
     25 #include <fst/arcsort.h>
     26 #include <fst/compose.h>
     27 #include <fst/project.h>
     28 #include <fst/randgen.h>
     29 #include <fst/shortest-distance.h>
     30 #include <fst/vector-fst.h>
     31 
     32 
     33 namespace fst {
     34 
     35 // Test if two FSTs are equivalent by randomly generating 'num_paths'
     36 // paths (as specified by the RandGenOptions 'opts') in these FSTs.
     37 //
     38 // For each randomly generated path, the algorithm computes for each
     39 // of the two FSTs the sum of the weights of all the successful paths
     40 // sharing the same input and output labels as the considered randomly
     41 // generated path and checks that these two values are within
     42 // 'delta'. Returns optional error value (when FLAGS_error_fatal = false).
     43 template<class Arc, class ArcSelector>
     44 bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
     45                     ssize_t num_paths, float delta,
     46                     const RandGenOptions<ArcSelector> &opts,
     47                     bool *error = 0) {
     48   typedef typename Arc::Weight Weight;
     49   if (error) *error = false;
     50 
     51   // Check that the symbol table are compatible
     52   if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
     53       !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
     54     FSTERROR() << "RandEquivalent: input/output symbol tables of 1st "
     55                << "argument do not match input/output symbol tables of 2nd "
     56                << "argument";
     57     if (error) *error = true;
     58     return false;
     59   }
     60 
     61   ILabelCompare<Arc> icomp;
     62   OLabelCompare<Arc> ocomp;
     63   VectorFst<Arc> sfst1(fst1);
     64   VectorFst<Arc> sfst2(fst2);
     65   Connect(&sfst1);
     66   Connect(&sfst2);
     67   ArcSort(&sfst1, icomp);
     68   ArcSort(&sfst2, icomp);
     69 
     70   bool ret = true;
     71   for (ssize_t n = 0; n < num_paths; ++n) {
     72     VectorFst<Arc> path;
     73     const Fst<Arc> &fst = rand() % 2 ? sfst1 : sfst2;
     74     RandGen(fst, &path, opts);
     75 
     76     VectorFst<Arc> ipath(path);
     77     VectorFst<Arc> opath(path);
     78     Project(&ipath, PROJECT_INPUT);
     79     Project(&opath, PROJECT_OUTPUT);
     80 
     81     VectorFst<Arc> cfst1, pfst1;
     82     Compose(ipath, sfst1, &cfst1);
     83     ArcSort(&cfst1, ocomp);
     84     Compose(cfst1, opath, &pfst1);
     85     // Give up if there are epsilon cycles in a non-idempotent semiring
     86     if (!(Weight::Properties() & kIdempotent) &&
     87         pfst1.Properties(kCyclic, true))
     88       continue;
     89     Weight sum1 = ShortestDistance(pfst1);
     90 
     91     VectorFst<Arc> cfst2, pfst2;
     92     Compose(ipath, sfst2, &cfst2);
     93     ArcSort(&cfst2, ocomp);
     94     Compose(cfst2, opath, &pfst2);
     95     // Give up if there are epsilon cycles in a non-idempotent semiring
     96     if (!(Weight::Properties() & kIdempotent) &&
     97         pfst2.Properties(kCyclic, true))
     98       continue;
     99     Weight sum2 = ShortestDistance(pfst2);
    100 
    101     if (!ApproxEqual(sum1, sum2, delta)) {
    102         VLOG(1) << "Sum1 = " << sum1;
    103         VLOG(1) << "Sum2 = " << sum2;
    104         ret = false;
    105         break;
    106     }
    107   }
    108 
    109   if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
    110     if (error) *error = true;
    111     return false;
    112   }
    113 
    114   return ret;
    115 }
    116 
    117 
    118 // Test if two FSTs are equivalent by randomly generating 'num_paths' paths
    119 // of length no more than 'path_length' using the seed 'seed' in these FSTs.
    120 // Returns optional error value (when FLAGS_error_fatal = false).
    121 template <class Arc>
    122 bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
    123                     ssize_t num_paths, float delta = kDelta,
    124                     int seed = time(0), int path_length = INT_MAX,
    125                     bool *error = 0) {
    126   UniformArcSelector<Arc> uniform_selector(seed);
    127   RandGenOptions< UniformArcSelector<Arc> >
    128       opts(uniform_selector, path_length);
    129   return RandEquivalent(fst1, fst2, num_paths, delta, opts, error);
    130 }
    131 
    132 
    133 }  // namespace fst
    134 
    135 #endif  // FST_LIB_RANDEQUIVALENT_H__
    136