Home | History | Annotate | Download | only in fst
      1 // state-table.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Classes for representing the mapping between state tuples and state Ids.
     20 
     21 #ifndef FST_LIB_STATE_TABLE_H__
     22 #define FST_LIB_STATE_TABLE_H__
     23 
     24 #include <deque>
     25 using std::deque;
     26 #include <vector>
     27 using std::vector;
     28 
     29 #include <fst/bi-table.h>
     30 #include <fst/expanded-fst.h>
     31 
     32 
     33 namespace fst {
     34 
     35 // STATE TABLES - these determine the bijective mapping between state
     36 // tuples (e.g. in composition triples of two FST states and a
     37 // composition filter state) and their corresponding state IDs.
     38 // They are classes, templated on state tuples, of the form:
     39 //
     40 // template <class T>
     41 // class StateTable {
     42 //  public:
     43 //   typedef typename T StateTuple;
     44 //
     45 //   // Required constructors.
     46 //   StateTable();
     47 //
     48 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
     49 //   StateId FindState(const StateTuple &);
     50 //   // Lookup state tuple by state ID.
     51 //   const StateTuple<StateId> &Tuple(StateId) const;
     52 //   // # of stored tuples.
     53 //   StateId Size() const;
     54 // };
     55 //
     56 // A state tuple has the form:
     57 //
     58 // template <class S>
     59 // struct StateTuple {
     60 //   typedef typename S StateId;
     61 //
     62 //   // Required constructors.
     63 //   StateTuple();
     64 //   StateTuple(const StateTuple &);
     65 // };
     66 
     67 
     68 // An implementation using a hash map for the tuple to state ID mapping.
     69 // The state tuple T must have == defined. H is the hash function.
     70 template <class T, class H>
     71 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
     72  public:
     73   typedef T StateTuple;
     74   typedef typename StateTuple::StateId StateId;
     75   using HashBiTable<StateId, T, H>::FindId;
     76   using HashBiTable<StateId, T, H>::FindEntry;
     77   using HashBiTable<StateId, T, H>::Size;
     78 
     79   HashStateTable() : HashBiTable<StateId, T, H>() {}
     80 
     81   // Reserves space for table_size elements.
     82   explicit HashStateTable(size_t table_size)
     83       : HashBiTable<StateId, T, H>(table_size) {}
     84 
     85   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
     86   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
     87 };
     88 
     89 
     90 // An implementation using a hash map for the tuple to state ID mapping.
     91 // The state tuple T must have == defined. H is the hash function.
     92 template <class T, class H>
     93 class CompactHashStateTable
     94     : public CompactHashBiTable<typename T::StateId, T, H> {
     95  public:
     96   typedef T StateTuple;
     97   typedef typename StateTuple::StateId StateId;
     98   using CompactHashBiTable<StateId, T, H>::FindId;
     99   using CompactHashBiTable<StateId, T, H>::FindEntry;
    100   using CompactHashBiTable<StateId, T, H>::Size;
    101 
    102   CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
    103 
    104   // Reserves space for 'table_size' elements.
    105   explicit CompactHashStateTable(size_t table_size)
    106       : CompactHashBiTable<StateId, T, H>(table_size) {}
    107 
    108   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    109   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    110 };
    111 
    112 // An implementation using a vector for the tuple to state mapping.
    113 // It is passed a function object FP that should fingerprint tuples
    114 // uniquely to an integer that can used as a vector index. Normally,
    115 // VectorStateTable constructs the FP object.  The user can instead
    116 // pass in this object; in that case, VectorStateTable takes its
    117 // ownership.
    118 template <class T, class FP>
    119 class VectorStateTable
    120     : public VectorBiTable<typename T::StateId, T, FP> {
    121  public:
    122   typedef T StateTuple;
    123   typedef typename StateTuple::StateId StateId;
    124   using VectorBiTable<StateId, T, FP>::FindId;
    125   using VectorBiTable<StateId, T, FP>::FindEntry;
    126   using VectorBiTable<StateId, T, FP>::Size;
    127   using VectorBiTable<StateId, T, FP>::Fingerprint;
    128 
    129   // Reserves space for 'table_size' elements.
    130   explicit VectorStateTable(FP *fp = 0, size_t table_size = 0)
    131       : VectorBiTable<StateId, T, FP>(fp, table_size) {}
    132 
    133   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    134   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    135 };
    136 
    137 
    138 // An implementation using a vector and a compact hash table. The
    139 // selecting functor S returns true for tuples to be hashed in the
    140 // vector.  The fingerprinting functor FP returns a unique fingerprint
    141 // for each tuple to be hashed in the vector (these need to be
    142 // suitable for indexing in a vector).  The hash functor H is used when
    143 // hashing tuple into the compact hash table.
    144 template <class T, class S, class FP, class H>
    145 class VectorHashStateTable
    146     : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
    147  public:
    148   typedef T StateTuple;
    149   typedef typename StateTuple::StateId StateId;
    150   using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
    151   using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
    152   using VectorHashBiTable<StateId, T, S, FP, H>::Size;
    153   using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
    154   using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
    155   using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
    156 
    157   VectorHashStateTable(S *s, FP *fp, H *h,
    158                        size_t vector_size = 0,
    159                        size_t tuple_size = 0)
    160       : VectorHashBiTable<StateId, T, S, FP, H>(
    161           s, fp, h, vector_size, tuple_size) {}
    162 
    163   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    164   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    165 };
    166 
    167 
    168 // An implementation using a hash map for the tuple to state ID
    169 // mapping. This version permits erasing of states.  The state tuple T
    170 // must have == defined and its default constructor must produce a
    171 // tuple that will never be seen. F is the hash function.
    172 template <class T, class F>
    173 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
    174  public:
    175   typedef T StateTuple;
    176   typedef typename StateTuple::StateId StateId;
    177   using ErasableBiTable<StateId, T, F>::FindId;
    178   using ErasableBiTable<StateId, T, F>::FindEntry;
    179   using ErasableBiTable<StateId, T, F>::Size;
    180   using ErasableBiTable<StateId, T, F>::Erase;
    181 
    182   ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
    183   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
    184   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
    185 };
    186 
    187 //
    188 // COMPOSITION STATE TUPLES AND TABLES
    189 //
    190 // The composition state table has the form:
    191 //
    192 // template <class A, class F>
    193 // class ComposeStateTable {
    194 //  public:
    195 //   typedef A Arc;
    196 //   typedef F FilterState;
    197 //   typedef typename A::StateId StateId;
    198 //   typedef ComposeStateTuple<StateId> StateTuple;
    199 //
    200 //   // Required constructors. Copy constructor does not copy state.
    201 //   ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
    202 //   ComposeStateTable(const ComposeStateTable<A, F> &table);
    203 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
    204 //   StateId FindState(const StateTuple &);
    205 //   // Lookup state tuple by state ID.
    206 //   const StateTuple<StateId> &Tuple(StateId) const;
    207 //   // # of stored tuples.
    208 //   StateId Size() const;
    209 //   // Return true if error encountered
    210 //   bool Error() const;
    211 // };
    212 
    213 // Represents the composition state.
    214 template <typename S, typename F>
    215 struct ComposeStateTuple {
    216   typedef S StateId;
    217   typedef F FilterState;
    218 
    219   ComposeStateTuple()
    220       : state_id1(kNoStateId), state_id2(kNoStateId),
    221         filter_state(FilterState::NoState()) {}
    222 
    223   ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
    224       : state_id1(s1), state_id2(s2), filter_state(f) {}
    225 
    226   StateId state_id1;         // State Id on fst1
    227   StateId state_id2;         // State Id on fst2
    228   FilterState filter_state;  // State of composition filter
    229 };
    230 
    231 // Equality of composition state tuples.
    232 template <typename S, typename F>
    233 inline bool operator==(const ComposeStateTuple<S, F>& x,
    234                        const ComposeStateTuple<S, F>& y) {
    235   if (&x == &y)
    236     return true;
    237   return x.state_id1 == y.state_id1 &&
    238       x.state_id2 == y.state_id2 &&
    239       x.filter_state == y.filter_state;
    240 }
    241 
    242 
    243 // Hashing of composition state tuples.
    244 template <typename S, typename F>
    245 class ComposeHash {
    246  public:
    247   size_t operator()(const ComposeStateTuple<S, F>& t) const {
    248     return t.state_id1 + t.state_id2 * kPrime0 +
    249         t.filter_state.Hash() * kPrime1;
    250   }
    251  private:
    252   static const size_t kPrime0;
    253   static const size_t kPrime1;
    254 };
    255 
    256 template <typename S, typename F>
    257 const size_t ComposeHash<S, F>::kPrime0 = 7853;
    258 
    259 template <typename S, typename F>
    260 const size_t ComposeHash<S, F>::kPrime1 = 7867;
    261 
    262 
    263 // A HashStateTable over composition tuples.
    264 template <typename A,
    265           typename F,
    266           typename H =
    267           CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
    268                                 ComposeHash<typename A::StateId, F> > >
    269 class GenericComposeStateTable : public H {
    270  public:
    271   typedef A Arc;
    272   typedef typename A::StateId StateId;
    273   typedef F FilterState;
    274   typedef ComposeStateTuple<StateId, F> StateTuple;
    275 
    276   GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
    277 
    278   // Reserves space for 'table_size' elements.
    279   GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
    280                            size_t table_size) : H(table_size) {}
    281 
    282   bool Error() const { return false; }
    283 
    284  private:
    285   void operator=(const GenericComposeStateTable<A, F> &table);  // disallow
    286 };
    287 
    288 
    289 //  Fingerprint for general composition tuples.
    290 template  <typename S, typename F>
    291 class ComposeFingerprint {
    292  public:
    293   typedef S StateId;
    294   typedef F FilterState;
    295   typedef ComposeStateTuple<S, F> StateTuple;
    296 
    297   // Required but suboptimal constructor.
    298   ComposeFingerprint() : mult1_(8192), mult2_(8192) {
    299     LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
    300   }
    301 
    302   // Constructor is provided the sizes of the input FSTs
    303   ComposeFingerprint(StateId nstates1, StateId nstates2)
    304       : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
    305 
    306   size_t operator()(const StateTuple &tuple) {
    307     return tuple.state_id1 + tuple.state_id2 * mult1_ +
    308         tuple.filter_state.Hash() * mult2_;
    309   }
    310 
    311  private:
    312   ssize_t mult1_;
    313   ssize_t mult2_;
    314 };
    315 
    316 
    317 // Useful when the first composition state determines the tuple.
    318 template  <typename S, typename F>
    319 class ComposeState1Fingerprint {
    320  public:
    321   typedef S StateId;
    322   typedef F FilterState;
    323   typedef ComposeStateTuple<S, F> StateTuple;
    324 
    325   size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
    326 };
    327 
    328 
    329 // Useful when the second composition state determines the tuple.
    330 template  <typename S, typename F>
    331 class ComposeState2Fingerprint {
    332  public:
    333   typedef S StateId;
    334   typedef F FilterState;
    335   typedef ComposeStateTuple<S, F> StateTuple;
    336 
    337   size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
    338 };
    339 
    340 
    341 // A VectorStateTable over composition tuples.  This can be used when
    342 // the product of number of states in FST1 and FST2 (and the
    343 // composition filter state hash) is manageable. If the FSTs are not
    344 // expanded Fsts, they will first have their states counted.
    345 template  <typename A, typename F>
    346 class ProductComposeStateTable : public
    347 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
    348                  ComposeFingerprint<typename A::StateId, F> > {
    349  public:
    350   typedef A Arc;
    351   typedef typename A::StateId StateId;
    352   typedef F FilterState;
    353   typedef ComposeStateTuple<StateId, F> StateTuple;
    354   typedef VectorStateTable<StateTuple,
    355                            ComposeFingerprint<StateId, F> > StateTable;
    356 
    357   // Reserves space for 'table_size' elements.
    358   ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
    359                            size_t table_size = 0)
    360       : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1),
    361                                                       CountStates(fst2)),
    362                    table_size) {}
    363 
    364   ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
    365       : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
    366 
    367   bool Error() const { return false; }
    368 
    369  private:
    370   void operator=(const ProductComposeStateTable<A, F> &table);  // disallow
    371 };
    372 
    373 // A VectorStateTable over composition tuples.  This can be used when
    374 // FST1 is a string (satisfies kStringProperties) and FST2 is
    375 // epsilon-free and deterministic. It should be used with a
    376 // composition filter that creates at most one filter state per tuple
    377 // under these conditions (e.g. SequenceComposeFilter or
    378 // MatchComposeFilter).
    379 template <typename A, typename F>
    380 class StringDetComposeStateTable : public
    381 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
    382                  ComposeState1Fingerprint<typename A::StateId, F> > {
    383  public:
    384   typedef A Arc;
    385   typedef typename A::StateId StateId;
    386   typedef F FilterState;
    387   typedef ComposeStateTuple<StateId, F> StateTuple;
    388   typedef VectorStateTable<StateTuple,
    389                            ComposeState1Fingerprint<StateId, F> > StateTable;
    390 
    391   StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
    392       : error_(false) {
    393     uint64 props1 = kString;
    394     uint64 props2 = kIDeterministic | kNoIEpsilons;
    395     if (fst1.Properties(props1, true) != props1 ||
    396         fst2.Properties(props2, true) != props2) {
    397       FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
    398                  << " fst2 not input deterministic and epsilon-free";
    399       error_ = true;
    400     }
    401   }
    402 
    403   StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
    404       : StateTable(table), error_(table.error_) {}
    405 
    406   bool Error() const { return error_; }
    407 
    408  private:
    409   bool error_;
    410 
    411   void operator=(const StringDetComposeStateTable<A, F> &table);  // disallow
    412 };
    413 
    414 
    415 // A VectorStateTable over composition tuples.  This can be used when
    416 // FST2 is a string (satisfies kStringProperties) and FST1 is
    417 // epsilon-free and deterministic. It should be used with a
    418 // composition filter that creates at most one filter state per tuple
    419 // under these conditions (e.g. SequenceComposeFilter or
    420 // MatchComposeFilter).
    421 template <typename A, typename F>
    422 class DetStringComposeStateTable : public
    423 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
    424                  ComposeState2Fingerprint<typename A::StateId, F> > {
    425  public:
    426   typedef A Arc;
    427   typedef typename A::StateId StateId;
    428   typedef F FilterState;
    429   typedef ComposeStateTuple<StateId, F> StateTuple;
    430   typedef VectorStateTable<StateTuple,
    431                            ComposeState2Fingerprint<StateId, F> > StateTable;
    432 
    433   DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
    434       :error_(false) {
    435     uint64 props1 = kODeterministic | kNoOEpsilons;
    436     uint64 props2 = kString;
    437     if (fst1.Properties(props1, true) != props1 ||
    438         fst2.Properties(props2, true) != props2) {
    439       FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
    440                  << " fst1 not output deterministic and epsilon-free";
    441       error_ = true;
    442     }
    443   }
    444 
    445   DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
    446       : StateTable(table), error_(table.error_) {}
    447 
    448   bool Error() const { return error_; }
    449 
    450  private:
    451   bool error_;
    452 
    453   void operator=(const DetStringComposeStateTable<A, F> &table);  // disallow
    454 };
    455 
    456 
    457 // An ErasableStateTable over composition tuples. The Erase(StateId) method
    458 // can be called if the user either is sure that composition will never return
    459 // to that tuple or doesn't care that if it does, it is assigned a new
    460 // state ID.
    461 template <typename A, typename F>
    462 class ErasableComposeStateTable : public
    463 ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
    464                    ComposeHash<typename A::StateId, F> > {
    465  public:
    466   typedef A Arc;
    467   typedef typename A::StateId StateId;
    468   typedef F FilterState;
    469   typedef ComposeStateTuple<StateId, F> StateTuple;
    470 
    471   ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
    472 
    473   bool Error() const { return false; }
    474 
    475  private:
    476   void operator=(const ErasableComposeStateTable<A, F> &table);  // disallow
    477 };
    478 
    479 }  // namespace fst
    480 
    481 #endif  // FST_LIB_STATE_TABLE_H__
    482