Home | History | Annotate | Download | only in fst
      1 // state-table.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Classes for representing the mapping between state tuples and state Ids.
     20 
     21 #ifndef FST_LIB_STATE_TABLE_H__
     22 #define FST_LIB_STATE_TABLE_H__
     23 
     24 #include <deque>
     25 #include <vector>
     26 using std::vector;
     27 
     28 #include <fst/bi-table.h>
     29 #include <fst/expanded-fst.h>
     30 
     31 
     32 namespace fst {
     33 
     34 // STATE TABLES - these determine the bijective mapping between state
     35 // tuples (e.g. in composition triples of two FST states and a
     36 // composition filter state) and their corresponding state IDs.
     37 // They are classes, templated on state tuples, of the form:
     38 //
     39 // template <class T>
     40 // class StateTable {
     41 //  public:
     42 //   typedef typename T StateTuple;
     43 //
     44 //   // Required constructors.
     45 //   StateTable();
     46 //
     47 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
     48 //   StateId FindState(const StateTuple &);
     49 //   // Lookup state tuple by state ID.
     50 //   const StateTuple<StateId> &Tuple(StateId) const;
     51 //   // # of stored tuples.
     52 //   StateId Size() const;
     53 // };
     54 //
     55 // A state tuple has the form:
     56 //
     57 // template <class S>
     58 // struct StateTuple {
     59 //   typedef typename S StateId;
     60 //
     61 //   // Required constructor.
     62 //   StateTuple();
     63 // };
     64 
     65 
     66 // An implementation using a hash map for the tuple to state ID mapping.
     67 // The state tuple T must have == defined and the default constructor
     68 // must produce a tuple that will never be seen. H is the hash function.
     69 template <class T, class H>
     70 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
     71  public:
     72   typedef T StateTuple;
     73   typedef typename StateTuple::StateId StateId;
     74   using HashBiTable<StateId, T, H>::FindId;
     75   using HashBiTable<StateId, T, H>::FindEntry;
     76   using HashBiTable<StateId, T, H>::Size;
     77 
     78   HashStateTable() : HashBiTable<StateId, T, H>() {}
     79   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
     80   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
     81 };
     82 
     83 
     84 // An implementation using a hash set for the tuple to state ID
     85 // mapping. The state tuple T must have == defined and the default
     86 // constructor must produce a tuple that will never be seen. H is the
     87 // hash function.
     88 template <class T, class H>
     89 class CompactHashStateTable
     90     : public CompactHashBiTable<typename T::StateId, T, H> {
     91  public:
     92   typedef T StateTuple;
     93   typedef typename StateTuple::StateId StateId;
     94   using CompactHashBiTable<StateId, T, H>::FindId;
     95   using CompactHashBiTable<StateId, T, H>::FindEntry;
     96   using CompactHashBiTable<StateId, T, H>::Size;
     97 
     98   CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
     99 
    100   // Reserves space for table_size elements.
    101   explicit CompactHashStateTable(size_t table_size)
    102       : CompactHashBiTable<StateId, T, H>(table_size) {}
    103 
    104   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    105   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    106 };
    107 
    108 // An implementation using a vector for the tuple to state mapping.
    109 // It is passed a function object FP that should fingerprint tuples
    110 // uniquely to an integer that can used as a vector index. Normally,
    111 // VectorStateTable constructs the FP object.  The user can instead
    112 // pass in this object; in that case, VectorStateTable takes its
    113 // ownership.
    114 template <class T, class FP>
    115 class VectorStateTable
    116     : public VectorBiTable<typename T::StateId, T, FP> {
    117  public:
    118   typedef T StateTuple;
    119   typedef typename StateTuple::StateId StateId;
    120   using VectorBiTable<StateId, T, FP>::FindId;
    121   using VectorBiTable<StateId, T, FP>::FindEntry;
    122   using VectorBiTable<StateId, T, FP>::Size;
    123   using VectorBiTable<StateId, T, FP>::Fingerprint;
    124 
    125   explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {}
    126   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    127   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    128 };
    129 
    130 
    131 // An implementation using a vector and a compact hash table. The
    132 // selecting functor S returns true for tuples to be hashed in the
    133 // vector.  The fingerprinting functor FP returns a unique fingerprint
    134 // for each tuple to be hashed in the vector (these need to be
    135 // suitable for indexing in a vector).  The hash functor H is used when
    136 // hashing tuple into the compact hash table.
    137 template <class T, class S, class FP, class H>
    138 class VectorHashStateTable
    139     : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
    140  public:
    141   typedef T StateTuple;
    142   typedef typename StateTuple::StateId StateId;
    143   using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
    144   using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
    145   using VectorHashBiTable<StateId, T, S, FP, H>::Size;
    146   using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
    147   using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
    148   using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
    149 
    150   VectorHashStateTable(S *s, FP *fp, H *h,
    151                        size_t vector_size = 0,
    152                        size_t tuple_size = 0)
    153       : VectorHashBiTable<StateId, T, S, FP, H>(
    154           s, fp, h, vector_size, tuple_size) {}
    155 
    156   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    157   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    158 };
    159 
    160 
    161 // An implementation using a hash map for the tuple to state ID
    162 // mapping. This version permits erasing of states.  The state tuple T
    163 // must have == defined and its default constructor must produce a
    164 // tuple that will never be seen. F is the hash function.
    165 template <class T, class F>
    166 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
    167  public:
    168   typedef T StateTuple;
    169   typedef typename StateTuple::StateId StateId;
    170   using ErasableBiTable<StateId, T, F>::FindId;
    171   using ErasableBiTable<StateId, T, F>::FindEntry;
    172   using ErasableBiTable<StateId, T, F>::Size;
    173   using ErasableBiTable<StateId, T, F>::Erase;
    174 
    175   ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
    176   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    177   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    178 };
    179 
    180 //
    181 // COMPOSITION STATE TUPLES AND TABLES
    182 //
    183 // The composition state table has the form:
    184 //
    185 // template <class A, class F>
    186 // class ComposeStateTable {
    187 //  public:
    188 //   typedef A Arc;
    189 //   typedef F FilterState;
    190 //   typedef typename A::StateId StateId;
    191 //   typedef ComposeStateTuple<StateId> StateTuple;
    192 //
    193 //   // Required constructors. Copy constructor does not copy state.
    194 //   ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
    195 //   ComposeStateTable(const ComposeStateTable<A, F> &table);
    196 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
    197 //   StateId FindState(const StateTuple &);
    198 //   // Lookup state tuple by state ID.
    199 //   const StateTuple<StateId> &Tuple(StateId) const;
    200 //   // # of stored tuples.
    201 //   StateId Size() const;
    202 //   // Return true if error encountered
    203 //   bool Error() const;
    204 // };
    205 
    206 // Represents the composition state.
    207 template <typename S, typename F>
    208 struct ComposeStateTuple {
    209   typedef S StateId;
    210   typedef F FilterState;
    211 
    212   ComposeStateTuple()
    213       : state_id1(kNoStateId), state_id2(kNoStateId),
    214         filter_state(FilterState::NoState()) {}
    215 
    216   ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
    217       : state_id1(s1), state_id2(s2), filter_state(f) {}
    218 
    219   StateId state_id1;         // State Id on fst1
    220   StateId state_id2;         // State Id on fst2
    221   FilterState filter_state;  // State of composition filter
    222 };
    223 
    224 // Equality of composition state tuples.
    225 template <typename S, typename F>
    226 inline bool operator==(const ComposeStateTuple<S, F>& x,
    227                        const ComposeStateTuple<S, F>& y) {
    228   if (&x == &y)
    229     return true;
    230   return x.state_id1 == y.state_id1 &&
    231       x.state_id2 == y.state_id2 &&
    232       x.filter_state == y.filter_state;
    233 }
    234 
    235 
    236 // Hashing of composition state tuples.
    237 template <typename S, typename F>
    238 class ComposeHash {
    239  public:
    240   size_t operator()(const ComposeStateTuple<S, F>& t) const {
    241     return t.state_id1 + t.state_id2 * kPrime0 +
    242         t.filter_state.Hash() * kPrime1;
    243   }
    244  private:
    245   static const size_t kPrime0;
    246   static const size_t kPrime1;
    247 };
    248 
    249 template <typename S, typename F>
    250 const size_t ComposeHash<S, F>::kPrime0 = 7853;
    251 
    252 template <typename S, typename F>
    253 const size_t ComposeHash<S, F>::kPrime1 = 7867;
    254 
    255 
    256 // A HashStateTable over composition tuples.
    257 template <typename A,
    258           typename F,
    259           typename H =
    260           CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
    261                                 ComposeHash<typename A::StateId, F> > >
    262 class GenericComposeStateTable : public H {
    263  public:
    264   typedef A Arc;
    265   typedef typename A::StateId StateId;
    266   typedef F FilterState;
    267   typedef ComposeStateTuple<StateId, F> StateTuple;
    268 
    269   GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
    270 
    271   GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {}
    272 
    273   bool Error() const { return false; }
    274 
    275  private:
    276   void operator=(const GenericComposeStateTable<A, F> &table);  // disallow
    277 };
    278 
    279 
    280 //  Fingerprint for general composition tuples.
    281 template  <typename S, typename F>
    282 class ComposeFingerprint {
    283  public:
    284   typedef S StateId;
    285   typedef F FilterState;
    286   typedef ComposeStateTuple<S, F> StateTuple;
    287 
    288   // Required but suboptimal constructor.
    289   ComposeFingerprint() : mult1_(8192), mult2_(8192) {
    290     LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
    291   }
    292 
    293   // Constructor is provided the sizes of the input FSTs
    294   ComposeFingerprint(StateId nstates1, StateId nstates2)
    295       : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
    296 
    297   size_t operator()(const StateTuple &tuple) {
    298     return tuple.state_id1 + tuple.state_id2 * mult1_ +
    299         tuple.filter_state.Hash() * mult2_;
    300   }
    301 
    302  private:
    303   ssize_t mult1_;
    304   ssize_t mult2_;
    305 };
    306 
    307 
    308 // Useful when the first composition state determines the tuple.
    309 template  <typename S, typename F>
    310 class ComposeState1Fingerprint {
    311  public:
    312   typedef S StateId;
    313   typedef F FilterState;
    314   typedef ComposeStateTuple<S, F> StateTuple;
    315 
    316   size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
    317 };
    318 
    319 
    320 // Useful when the second composition state determines the tuple.
    321 template  <typename S, typename F>
    322 class ComposeState2Fingerprint {
    323  public:
    324   typedef S StateId;
    325   typedef F FilterState;
    326   typedef ComposeStateTuple<S, F> StateTuple;
    327 
    328   size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
    329 };
    330 
    331 
    332 // A VectorStateTable over composition tuples.  This can be used when
    333 // the product of number of states in FST1 and FST2 (and the
    334 // composition filter state hash) is manageable. If the FSTs are not
    335 // expanded Fsts, they will first have their states counted.
    336 template  <typename A, typename F>
    337 class ProductComposeStateTable : public
    338 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
    339                  ComposeFingerprint<typename A::StateId, F> > {
    340  public:
    341   typedef A Arc;
    342   typedef typename A::StateId StateId;
    343   typedef F FilterState;
    344   typedef ComposeStateTuple<StateId, F> StateTuple;
    345 
    346   ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
    347       : VectorStateTable<ComposeStateTuple<StateId, F>,
    348                          ComposeFingerprint<StateId, F> >
    349   (new ComposeFingerprint<StateId, F>(CountStates(fst1),
    350                                       CountStates(fst2))) { }
    351 
    352   ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
    353       : VectorStateTable<ComposeStateTuple<StateId, F>,
    354                          ComposeFingerprint<StateId, F> >
    355         (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
    356 
    357   bool Error() const { return false; }
    358 
    359  private:
    360   void operator=(const ProductComposeStateTable<A, F> &table);  // disallow
    361 };
    362 
    363 // A VectorStateTable over composition tuples.  This can be used when
    364 // FST1 is a string (satisfies kStringProperties) and FST2 is
    365 // epsilon-free and deterministic. It should be used with a
    366 // composition filter that creates at most one filter state per tuple
    367 // under these conditions (e.g. SequenceComposeFilter or
    368 // MatchComposeFilter).
    369 template <typename A, typename F>
    370 class StringDetComposeStateTable : public
    371 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
    372                  ComposeState1Fingerprint<typename A::StateId, F> > {
    373  public:
    374   typedef A Arc;
    375   typedef typename A::StateId StateId;
    376   typedef F FilterState;
    377   typedef ComposeStateTuple<StateId, F> StateTuple;
    378 
    379   StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
    380       : error_(false) {
    381     uint64 props1 = kString;
    382     uint64 props2 = kIDeterministic | kNoIEpsilons;
    383     if (fst1.Properties(props1, true) != props1 ||
    384         fst2.Properties(props2, true) != props2) {
    385       FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
    386                  << " fst2 not input deterministic and epsilon-free";
    387       error_ = true;
    388     }
    389   }
    390 
    391   StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
    392    : error_(table.error_) {}
    393 
    394   bool Error() const { return error_; }
    395 
    396  private:
    397   bool error_;
    398 
    399   void operator=(const StringDetComposeStateTable<A, F> &table);  // disallow
    400 };
    401 
    402 
    403 // A VectorStateTable over composition tuples.  This can be used when
    404 // FST2 is a string (satisfies kStringProperties) and FST1 is
    405 // epsilon-free and deterministic. It should be used with a
    406 // composition filter that creates at most one filter state per tuple
    407 // under these conditions (e.g. SequenceComposeFilter or
    408 // MatchComposeFilter).
    409 template <typename A, typename F>
    410 class DetStringComposeStateTable : public
    411 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
    412                  ComposeState1Fingerprint<typename A::StateId, F> > {
    413  public:
    414   typedef A Arc;
    415   typedef typename A::StateId StateId;
    416   typedef F FilterState;
    417   typedef ComposeStateTuple<StateId, F> StateTuple;
    418 
    419   DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
    420       :error_(false) {
    421     uint64 props1 = kODeterministic | kNoOEpsilons;
    422     uint64 props2 = kString;
    423     if (fst1.Properties(props1, true) != props1 ||
    424         fst2.Properties(props2, true) != props2) {
    425       FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
    426                  << " fst1 not output deterministic and epsilon-free";
    427       error_ = true;
    428     }
    429   }
    430 
    431   DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
    432       : error_(table.error_) {}
    433 
    434   bool Error() const { return error_; }
    435 
    436  private:
    437   bool error_;
    438 
    439   void operator=(const DetStringComposeStateTable<A, F> &table);  // disallow
    440 };
    441 
    442 
    443 // An ErasableStateTable over composition tuples. The Erase(StateId) method
    444 // can be called if the user either is sure that composition will never return
    445 // to that tuple or doesn't care that if it does, it is assigned a new
    446 // state ID.
    447 template <typename A, typename F>
    448 class ErasableComposeStateTable : public
    449 ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
    450                    ComposeHash<typename A::StateId, F> > {
    451  public:
    452   typedef A Arc;
    453   typedef typename A::StateId StateId;
    454   typedef F FilterState;
    455   typedef ComposeStateTuple<StateId, F> StateTuple;
    456 
    457   ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
    458 
    459   ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {}
    460 
    461   bool Error() const { return false; }
    462 
    463  private:
    464   void operator=(const ErasableComposeStateTable<A, F> &table);  // disallow
    465 };
    466 
    467 }  // namespace fst
    468 
    469 #endif  // FST_LIB_STATE_TABLE_H__
    470