Home | History | Annotate | Download | only in fst
      1 // compose.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Class to compute the composition of two FSTs
     20 
     21 #ifndef FST_LIB_COMPOSE_H__
     22 #define FST_LIB_COMPOSE_H__
     23 
     24 #include <algorithm>
     25 #include <string>
     26 #include <vector>
     27 using std::vector;
     28 
     29 #include <fst/cache.h>
     30 #include <fst/compose-filter.h>
     31 #include <fst/lookahead-filter.h>
     32 #include <fst/matcher.h>
     33 #include <fst/state-table.h>
     34 #include <fst/test-properties.h>
     35 
     36 
     37 namespace fst {
     38 
     39 // Delayed composition options templated on the arc type, the matcher,
     40 // the composition filter, and the composition state table.  By
     41 // default, the matchers, filter, and state table are constructed by
     42 // composition. If set below, the user can instead pass in these
     43 // objects; in that case, ComposeFst takes their ownership. This
     44 // version controls composition implemented between generic Fst<Arc>
     45 // types and a shared matcher type M for Fst<Arc>. This should be
     46 // adequate for most applications, giving a reasonable tradeoff
     47 // between efficiency and code sharing (but see ComposeFstImplOptions).
     48 template <class A,
     49           class M = Matcher<Fst<A> >,
     50           class F = SequenceComposeFilter<M>,
     51           class T = GenericComposeStateTable<A, typename F::FilterState> >
     52 struct ComposeFstOptions : public CacheOptions {
     53   M *matcher1;      // FST1 matcher (see matcher.h)
     54   M *matcher2;      // FST2 matcher
     55   F *filter;        // Composition filter (see compose-filter.h)
     56   T *state_table;   // Composition state table (see compose-state-table.h)
     57 
     58   explicit ComposeFstOptions(const CacheOptions &opts,
     59                              M *mat1 = 0, M *mat2 = 0,
     60                              F *filt = 0, T *sttable= 0)
     61       : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
     62         filter(filt), state_table(sttable) {}
     63 
     64   ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
     65 };
     66 
     67 
     68 // Delayed composition options templated on the two matcher types, the
     69 // composition filter, and the composition state table.  By default,
     70 // the matchers, filter, and state table are constructed by
     71 // composition. If set below, the user can instead pass in these
     72 // objects; in that case, ComposeFst takes their ownership. This
     73 // version controls composition implemented using arbitrary matchers
     74 // (of the same Arc type but otherwise arbitrary Fst type). The user
     75 // must ensure the matchers are compatible. These options permit the
     76 // most efficient use, but shares the least code. This is for advanced
     77 // use only in the most demanding or specialized applications that can
     78 // benefit from it (o.w. prefer ComposeFstOptions).
     79 template <class M1, class M2,
     80           class F = SequenceComposeFilter<M1, M2>,
     81           class T = GenericComposeStateTable<typename M1::Arc,
     82                                              typename F::FilterState> >
     83 struct ComposeFstImplOptions : public CacheOptions {
     84   M1 *matcher1;     // FST1 matcher (see matcher.h)
     85   M2 *matcher2;     // FST2 matcher
     86   F *filter;        // Composition filter (see compose-filter.h)
     87   T *state_table;   // Composition state table (see compose-state-table.h)
     88 
     89   explicit ComposeFstImplOptions(const CacheOptions &opts,
     90                                  M1 *mat1 = 0, M2 *mat2 = 0,
     91                                  F *filt = 0, T *sttable= 0)
     92       : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
     93         filter(filt), state_table(sttable) {}
     94 
     95   ComposeFstImplOptions()
     96   : matcher1(0), matcher2(0), filter(0), state_table(0) {}
     97 };
     98 
     99 
    100 // Implementation of delayed composition. This base class is
    101 // common to the variants with different matchers, composition filters
    102 // and state tables.
    103 template <class A>
    104 class ComposeFstImplBase : public CacheImpl<A> {
    105  public:
    106   using FstImpl<A>::SetType;
    107   using FstImpl<A>::SetProperties;
    108   using FstImpl<A>::Properties;
    109   using FstImpl<A>::SetInputSymbols;
    110   using FstImpl<A>::SetOutputSymbols;
    111 
    112   using CacheBaseImpl< CacheState<A> >::HasStart;
    113   using CacheBaseImpl< CacheState<A> >::HasFinal;
    114   using CacheBaseImpl< CacheState<A> >::HasArcs;
    115   using CacheBaseImpl< CacheState<A> >::SetFinal;
    116   using CacheBaseImpl< CacheState<A> >::SetStart;
    117 
    118   typedef typename A::Label Label;
    119   typedef typename A::Weight Weight;
    120   typedef typename A::StateId StateId;
    121   typedef CacheState<A> State;
    122 
    123   ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
    124                      const CacheOptions &opts)
    125       : CacheImpl<A>(opts) {
    126     VLOG(2) << "ComposeFst(" << this << "): Begin";
    127     SetType("compose");
    128 
    129     if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
    130       FSTERROR() << "ComposeFst: output symbol table of 1st argument "
    131                  << "does not match input symbol table of 2nd argument";
    132       SetProperties(kError, kError);
    133     }
    134 
    135     SetInputSymbols(fst1.InputSymbols());
    136     SetOutputSymbols(fst2.OutputSymbols());
    137   }
    138 
    139   ComposeFstImplBase(const ComposeFstImplBase<A> &impl)
    140       : CacheImpl<A>(impl, true) {
    141     SetProperties(impl.Properties(), kCopyProperties);
    142     SetInputSymbols(impl.InputSymbols());
    143     SetOutputSymbols(impl.OutputSymbols());
    144   }
    145 
    146   virtual ComposeFstImplBase<A> *Copy() = 0;
    147 
    148   virtual ~ComposeFstImplBase() {}
    149 
    150   StateId Start() {
    151     if (!HasStart()) {
    152       StateId start = ComputeStart();
    153       if (start != kNoStateId) {
    154         SetStart(start);
    155       }
    156     }
    157     return CacheImpl<A>::Start();
    158   }
    159 
    160   Weight Final(StateId s) {
    161     if (!HasFinal(s)) {
    162       Weight final = ComputeFinal(s);
    163       SetFinal(s, final);
    164     }
    165     return CacheImpl<A>::Final(s);
    166   }
    167 
    168   virtual void Expand(StateId s) = 0;
    169 
    170   size_t NumArcs(StateId s) {
    171     if (!HasArcs(s))
    172       Expand(s);
    173     return CacheImpl<A>::NumArcs(s);
    174   }
    175 
    176   size_t NumInputEpsilons(StateId s) {
    177     if (!HasArcs(s))
    178       Expand(s);
    179     return CacheImpl<A>::NumInputEpsilons(s);
    180   }
    181 
    182   size_t NumOutputEpsilons(StateId s) {
    183     if (!HasArcs(s))
    184       Expand(s);
    185     return CacheImpl<A>::NumOutputEpsilons(s);
    186   }
    187 
    188   void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
    189     if (!HasArcs(s))
    190       Expand(s);
    191     CacheImpl<A>::InitArcIterator(s, data);
    192   }
    193 
    194  protected:
    195   virtual StateId ComputeStart() = 0;
    196   virtual Weight ComputeFinal(StateId s) = 0;
    197 };
    198 
    199 
    200 // Implementaion of delayed composition templated on the matchers (see
    201 // matcher.h), composition filter (see compose-filter-inl.h) and
    202 // the composition state table (see compose-state-table.h).
    203 template <class M1, class M2, class F, class T>
    204 class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
    205   typedef typename M1::FST FST1;
    206   typedef typename M2::FST FST2;
    207   typedef typename M1::Arc Arc;
    208   typedef typename Arc::StateId StateId;
    209   typedef typename Arc::Label Label;
    210   typedef typename Arc::Weight Weight;
    211   typedef typename F::FilterState FilterState;
    212   typedef typename F::Matcher1 Matcher1;
    213   typedef typename F::Matcher2 Matcher2;
    214 
    215   using CacheBaseImpl<CacheState<Arc> >::SetArcs;
    216   using FstImpl<Arc>::SetType;
    217   using FstImpl<Arc>::SetProperties;
    218 
    219   typedef ComposeStateTuple<StateId, FilterState> StateTuple;
    220 
    221  public:
    222   ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
    223                  const ComposeFstImplOptions<M1, M2, F, T> &opts);
    224 
    225   ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl)
    226       : ComposeFstImplBase<Arc>(impl),
    227         filter_(new F(*impl.filter_, true)),
    228         matcher1_(filter_->GetMatcher1()),
    229         matcher2_(filter_->GetMatcher2()),
    230         fst1_(matcher1_->GetFst()),
    231         fst2_(matcher2_->GetFst()),
    232         state_table_(new T(*impl.state_table_)),
    233         match_type_(impl.match_type_) {}
    234 
    235   ~ComposeFstImpl() {
    236     VLOG(2) << "ComposeFst(" << this
    237             << "): End: # of visited states: " << state_table_->Size();
    238 
    239     delete filter_;
    240     delete state_table_;
    241   }
    242 
    243   virtual ComposeFstImpl<M1, M2, F, T> *Copy() {
    244     return new ComposeFstImpl<M1, M2, F, T>(*this);
    245   }
    246 
    247   uint64 Properties() const { return Properties(kFstProperties); }
    248 
    249   // Set error if found; return FST impl properties.
    250   uint64 Properties(uint64 mask) const {
    251     if ((mask & kError) &&
    252         (fst1_.Properties(kError, false) ||
    253          fst2_.Properties(kError, false) ||
    254          (matcher1_->Properties(0) & kError) ||
    255          (matcher2_->Properties(0) & kError) |
    256          (filter_->Properties(0) & kError) ||
    257          state_table_->Error())) {
    258       SetProperties(kError, kError);
    259     }
    260     return FstImpl<Arc>::Properties(mask);
    261   }
    262 
    263   // Arranges it so that the first arg to OrderedExpand is the Fst
    264   // that will be matched on.
    265   void Expand(StateId s) {
    266     const StateTuple &tuple = state_table_->Tuple(s);
    267     StateId s1 = tuple.state_id1;
    268     StateId s2 = tuple.state_id2;
    269     filter_->SetState(s1, s2, tuple.filter_state);
    270     if (match_type_ == MATCH_OUTPUT ||
    271         (match_type_ == MATCH_BOTH &&
    272          internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2)))
    273       OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
    274     else
    275       OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
    276   }
    277 
    278   const FST1 &GetFst1() { return fst1_; }
    279   const FST2 &GetFst2() { return fst2_; }
    280   M1 *GetMatcher1() { return matcher1_; }
    281   M2 *GetMatcher2() { return matcher2_; }
    282   F *GetFilter() { return filter_; }
    283   T *GetStateTable() { return state_table_; }
    284 
    285  private:
    286   // This does that actual matching of labels in the composition. The
    287   // arguments are ordered so matching is called on state 'sa' of
    288   // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
    289   // determines whether the input or output label of arcs at 'sb' is
    290   // the one to match on.
    291   template <class FST, class Matcher>
    292   void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
    293                      const FST &fstb, StateId sb,
    294                      Matcher *matchera,  bool match_input) {
    295     matchera->SetState(sa);
    296 
    297     // First process non-consuming symbols (e.g., epsilons) on FSTA.
    298     Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
    299            Weight::One(), sb);
    300     MatchArc(s, matchera, loop, match_input);
    301 
    302     // Then process matches on FSTB.
    303     for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
    304       MatchArc(s, matchera, iterb.Value(), match_input);
    305 
    306     SetArcs(s);
    307   }
    308 
    309   // Matches a single transition from 'fstb' against 'fata' at 's'.
    310   template <class Matcher>
    311   void MatchArc(StateId s, Matcher *matchera,
    312                 const Arc &arc, bool match_input) {
    313     if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
    314       for (; !matchera->Done(); matchera->Next()) {
    315         Arc arca = matchera->Value();
    316         Arc arcb = arc;
    317         if (match_input) {
    318           const FilterState &f = filter_->FilterArc(&arcb, &arca);
    319           if (f != FilterState::NoState())
    320             AddArc(s, arcb, arca, f);
    321         } else {
    322           const FilterState &f = filter_->FilterArc(&arca, &arcb);
    323           if (f != FilterState::NoState())
    324             AddArc(s, arca, arcb, f);
    325         }
    326       }
    327     }
    328   }
    329 
    330   // Add a matching transition at 's'.
    331    void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
    332                const FilterState &f) {
    333     StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
    334     Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
    335            state_table_->FindState(tuple));
    336     CacheImpl<Arc>::PushArc(s, oarc);
    337   }
    338 
    339   StateId ComputeStart() {
    340     StateId s1 = fst1_.Start();
    341     if (s1 == kNoStateId)
    342       return kNoStateId;
    343 
    344     StateId s2 = fst2_.Start();
    345     if (s2 == kNoStateId)
    346       return kNoStateId;
    347 
    348     const FilterState &f = filter_->Start();
    349     StateTuple tuple(s1, s2, f);
    350     return state_table_->FindState(tuple);
    351   }
    352 
    353   Weight ComputeFinal(StateId s) {
    354     const StateTuple &tuple = state_table_->Tuple(s);
    355     StateId s1 = tuple.state_id1;
    356     Weight final1 = internal::Final(fst1_, s1);
    357     if (final1 == Weight::Zero())
    358       return final1;
    359 
    360     StateId s2 = tuple.state_id2;
    361     Weight final2 = internal::Final(fst2_, s2);
    362     if (final2 == Weight::Zero())
    363       return final2;
    364 
    365     filter_->SetState(s1, s2, tuple.filter_state);
    366     filter_->FilterFinal(&final1, &final2);
    367     return Times(final1, final2);
    368   }
    369 
    370   // Identifies and verifies the capabilities of the matcher to be used for
    371   // composition.
    372   void SetMatchType();
    373 
    374   F *filter_;
    375   Matcher1 *matcher1_;
    376   Matcher2 *matcher2_;
    377   const FST1 &fst1_;
    378   const FST2 &fst2_;
    379   T *state_table_;
    380 
    381   MatchType match_type_;
    382 
    383   void operator=(const ComposeFstImpl<M1, M2, F, T> &);  // disallow
    384 };
    385 
    386 template <class M1, class M2, class F, class T> inline
    387 ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
    388     const FST1 &fst1, const FST2 &fst2,
    389     const ComposeFstImplOptions<M1, M2, F, T> &opts)
    390     : ComposeFstImplBase<Arc>(fst1, fst2, opts),
    391       filter_(opts.filter ? opts.filter :
    392               new F(fst1, fst2, opts.matcher1, opts.matcher2)),
    393       matcher1_(filter_->GetMatcher1()),
    394       matcher2_(filter_->GetMatcher2()),
    395       fst1_(matcher1_->GetFst()),
    396       fst2_(matcher2_->GetFst()),
    397       state_table_(opts.state_table ? opts.state_table :
    398                    new T(fst1_, fst2_)) {
    399   SetMatchType();
    400   if (match_type_ == MATCH_NONE)
    401     SetProperties(kError, kError);
    402   VLOG(2) << "ComposeFst(" << this << "): Match type: "
    403           << (match_type_ == MATCH_OUTPUT ? "output" :
    404               (match_type_ == MATCH_INPUT ? "input" :
    405                (match_type_ == MATCH_BOTH ? "both" :
    406                 (match_type_ == MATCH_NONE ? "none" : "unknown"))));
    407 
    408   uint64 fprops1 = fst1.Properties(kFstProperties, false);
    409   uint64 fprops2 = fst2.Properties(kFstProperties, false);
    410   uint64 mprops1 = matcher1_->Properties(fprops1);
    411   uint64 mprops2 = matcher2_->Properties(fprops2);
    412   uint64 cprops = ComposeProperties(mprops1, mprops2);
    413   SetProperties(filter_->Properties(cprops), kCopyProperties);
    414   if (state_table_->Error()) SetProperties(kError, kError);
    415   VLOG(2) << "ComposeFst(" << this << "): Initialized";
    416 }
    417 
    418 template <class M1, class M2, class F, class T>
    419 void ComposeFstImpl<M1, M2, F, T>::SetMatchType() {
    420   MatchType type1 = matcher1_->Type(false);
    421   MatchType type2 = matcher2_->Type(false);
    422   uint32 flags1 = matcher1_->Flags();
    423   uint32 flags2 = matcher2_->Flags();
    424   if (flags1 & flags2 & kRequireMatch) {
    425     FSTERROR() << "ComposeFst: only one argument can require matching.";
    426     match_type_ = MATCH_NONE;
    427   } else if (flags1 & kRequireMatch) {
    428     if (matcher1_->Type(true) != MATCH_OUTPUT) {
    429       FSTERROR() << "ComposeFst: 1st argument requires matching but cannot.";
    430       match_type_ = MATCH_NONE;
    431     }
    432     match_type_ = MATCH_OUTPUT;
    433   } else if (flags2 & kRequireMatch) {
    434     if (matcher2_->Type(true) != MATCH_INPUT) {
    435       FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot.";
    436       match_type_ = MATCH_NONE;
    437     }
    438     match_type_ = MATCH_INPUT;
    439   } else if (flags1 & flags2 & kPreferMatch &&
    440              type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
    441     match_type_ = MATCH_BOTH;
    442   } else if (flags1 & kPreferMatch && type1 == MATCH_OUTPUT) {
    443     match_type_ = MATCH_OUTPUT;
    444   } else if  (flags2 & kPreferMatch && type2 == MATCH_INPUT) {
    445     match_type_ = MATCH_INPUT;
    446   } else if (type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
    447     match_type_ = MATCH_BOTH;
    448   } else if (type1 == MATCH_OUTPUT) {
    449     match_type_ = MATCH_OUTPUT;
    450   } else if (type2 == MATCH_INPUT) {
    451     match_type_ = MATCH_INPUT;
    452   } else if (flags1 & kPreferMatch && matcher1_->Type(true) == MATCH_OUTPUT) {
    453     match_type_ = MATCH_OUTPUT;
    454   } else if  (flags2 & kPreferMatch && matcher2_->Type(true) == MATCH_INPUT) {
    455     match_type_ = MATCH_INPUT;
    456   } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
    457     match_type_ = MATCH_OUTPUT;
    458   } else if (matcher2_->Type(true) == MATCH_INPUT) {
    459     match_type_ = MATCH_INPUT;
    460   } else {
    461     FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
    462                << "and 2nd argument cannot match on input labels (sort?).";
    463     match_type_ = MATCH_NONE;
    464   }
    465 }
    466 
    467 
    468 // Computes the composition of two transducers. This version is a
    469 // delayed Fst. If FST1 transduces string x to y with weight a and FST2
    470 // transduces y to z with weight b, then their composition transduces
    471 // string x to z with weight Times(x, z).
    472 //
    473 // The output labels of the first transducer or the input labels of
    474 // the second transducer must be sorted (with the default matcher).
    475 // The weights need to form a commutative semiring (valid for
    476 // TropicalWeight and LogWeight).
    477 //
    478 // Complexity:
    479 // Assuming the first FST is unsorted and the second is sorted:
    480 // - Time: O(v1 v2 d1 (log d2 + m2)),
    481 // - Space: O(v1 v2)
    482 // where vi = # of states visited, di = maximum out-degree, and mi the
    483 // maximum multiplicity of the states visited for the ith
    484 // FST. Constant time and space to visit an input state or arc is
    485 // assumed and exclusive of caching.
    486 //
    487 // Caveats:
    488 // - ComposeFst does not trim its output (since it is a delayed operation).
    489 // - The efficiency of composition can be strongly affected by several factors:
    490 //   - the choice of which tnansducer is sorted - prefer sorting the FST
    491 //     that has the greater average out-degree.
    492 //   - the amount of non-determinism
    493 //   - the presence and location of epsilon transitions - avoid epsilon
    494 //     transitions on the output side of the first transducer or
    495 //     the input side of the second transducer or prefer placing
    496 //     them later in a path since they delay matching and can
    497 //     introduce non-coaccessible states and transitions.
    498 //
    499 // This class attaches interface to implementation and handles
    500 // reference counting, delegating most methods to ImplToFst.
    501 template <class A>
    502 class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
    503  public:
    504   friend class ArcIterator< ComposeFst<A> >;
    505   friend class StateIterator< ComposeFst<A> >;
    506 
    507   typedef A Arc;
    508   typedef typename A::Weight Weight;
    509   typedef typename A::StateId StateId;
    510   typedef CacheState<A> State;
    511   typedef ComposeFstImplBase<A> Impl;
    512 
    513   using ImplToFst<Impl>::SetImpl;
    514 
    515   // Compose specifying only caching options.
    516   ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
    517              const CacheOptions &opts = CacheOptions())
    518       : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
    519 
    520   // Compose specifying one shared matcher type M.  Requires input
    521   // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
    522   // best code-sharing and matcher compatiblity.
    523   template <class M, class F, class T>
    524   ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
    525              const ComposeFstOptions<A, M, F, T> &opts)
    526       : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
    527 
    528   // Compose specifying two matcher types M1 and M2.  Requires input
    529   // Fsts (of the same Arc type but o.w. arbitrary) match the
    530   // corresponding matcher FST types (M1::FST, M2::FST). Recommended
    531   // only for advanced use in demanding or specialized applications
    532   // due to potential code bloat and matcher incompatibilities.
    533   template <class M1, class M2, class F, class T>
    534   ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
    535              const ComposeFstImplOptions<M1, M2, F, T> &opts)
    536       : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
    537 
    538   // See Fst<>::Copy() for doc.
    539   ComposeFst(const ComposeFst<A> &fst, bool safe = false) {
    540     if (safe)
    541       SetImpl(fst.GetImpl()->Copy());
    542     else
    543       SetImpl(fst.GetImpl(), false);
    544   }
    545 
    546   // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
    547   virtual ComposeFst<A> *Copy(bool safe = false) const {
    548     return new ComposeFst<A>(*this, safe);
    549   }
    550 
    551   virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
    552 
    553   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
    554     GetImpl()->InitArcIterator(s, data);
    555   }
    556 
    557  protected:
    558   ComposeFst() {}
    559 
    560   // Create compose implementation specifying two matcher types.
    561   template <class M1, class M2, class F, class T>
    562   static Impl *CreateBase2(
    563       const typename M1::FST &fst1, const typename M2::FST &fst2,
    564       const ComposeFstImplOptions<M1, M2, F, T> &opts) {
    565     Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts);
    566     if (!(Weight::Properties() & kCommutative)) {
    567       int64 props1 = fst1.Properties(kUnweighted, true);
    568       int64 props2 = fst2.Properties(kUnweighted, true);
    569       if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
    570         FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
    571                    << Weight::Type();
    572         impl->SetProperties(kError, kError);
    573       }
    574     }
    575     return impl;
    576   }
    577 
    578   // Create compose implementation specifying one matcher type.
    579   //  Requires input Fsts and matcher FST type (M::FST) be Fst<A>
    580   template <class M, class F, class T>
    581   static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
    582                            const ComposeFstOptions<A, M, F, T> &opts) {
    583     ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2,
    584                                             opts.filter, opts.state_table);
    585     return CreateBase2(fst1, fst2, nopts);
    586   }
    587 
    588   // Create compose implementation specifying no matcher type.
    589   static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
    590                           const CacheOptions &opts) {
    591     switch (LookAheadMatchType(fst1, fst2)) {  // Check for lookahead matchers
    592       default:
    593       case MATCH_NONE: {     // Default composition (no look-ahead)
    594         VLOG(2) << "ComposeFst: Default composition (no look-ahead)";
    595         ComposeFstOptions<Arc> nopts(opts);
    596         return CreateBase1(fst1, fst2, nopts);
    597       }
    598       case MATCH_OUTPUT: {   // Lookahead on fst1
    599         VLOG(2) << "ComposeFst: Lookahead on fst1";
    600         typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
    601         typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
    602         ComposeFstOptions<Arc, M, F> nopts(opts);
    603         return CreateBase1(fst1, fst2, nopts);
    604       }
    605       case MATCH_INPUT: {    // Lookahead on fst2
    606         VLOG(2) << "ComposeFst: Lookahead on fst2";
    607         typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
    608         typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
    609         ComposeFstOptions<Arc, M, F> nopts(opts);
    610         return CreateBase1(fst1, fst2, nopts);
    611       }
    612     }
    613   }
    614 
    615  private:
    616   // Makes visible to friends.
    617   Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
    618 
    619   void operator=(const ComposeFst<A> &fst);  // disallow
    620 };
    621 
    622 
    623 // Specialization for ComposeFst.
    624 template<class A>
    625 class StateIterator< ComposeFst<A> >
    626     : public CacheStateIterator< ComposeFst<A> > {
    627  public:
    628   explicit StateIterator(const ComposeFst<A> &fst)
    629       : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {}
    630 };
    631 
    632 
    633 // Specialization for ComposeFst.
    634 template <class A>
    635 class ArcIterator< ComposeFst<A> >
    636     : public CacheArcIterator< ComposeFst<A> > {
    637  public:
    638   typedef typename A::StateId StateId;
    639 
    640   ArcIterator(const ComposeFst<A> &fst, StateId s)
    641       : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) {
    642     if (!fst.GetImpl()->HasArcs(s))
    643       fst.GetImpl()->Expand(s);
    644   }
    645 
    646  private:
    647   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
    648 };
    649 
    650 template <class A> inline
    651 void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
    652   data->base = new StateIterator< ComposeFst<A> >(*this);
    653 }
    654 
    655 // Useful alias when using StdArc.
    656 typedef ComposeFst<StdArc> StdComposeFst;
    657 
    658 enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER,
    659                      MATCH_FILTER };
    660 
    661 struct ComposeOptions {
    662   bool connect;  // Connect output
    663   ComposeFilter filter_type;  // Which pre-defined filter to use
    664 
    665   ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
    666       : connect(c), filter_type(ft) {}
    667   ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
    668 };
    669 
    670 // Computes the composition of two transducers. This version writes
    671 // the composed FST into a MurableFst. If FST1 transduces string x to
    672 // y with weight a and FST2 transduces y to z with weight b, then
    673 // their composition transduces string x to z with weight
    674 // Times(x, z).
    675 //
    676 // The output labels of the first transducer or the input labels of
    677 // the second transducer must be sorted.  The weights need to form a
    678 // commutative semiring (valid for TropicalWeight and LogWeight).
    679 //
    680 // Complexity:
    681 // Assuming the first FST is unsorted and the second is sorted:
    682 // - Time: O(V1 V2 D1 (log D2 + M2)),
    683 // - Space: O(V1 V2 D1 M2)
    684 // where Vi = # of states, Di = maximum out-degree, and Mi is
    685 // the maximum multiplicity for the ith FST.
    686 //
    687 // Caveats:
    688 // - Compose trims its output.
    689 // - The efficiency of composition can be strongly affected by several factors:
    690 //   - the choice of which tnansducer is sorted - prefer sorting the FST
    691 //     that has the greater average out-degree.
    692 //   - the amount of non-determinism
    693 //   - the presence and location of epsilon transitions - avoid epsilon
    694 //     transitions on the output side of the first transducer or
    695 //     the input side of the second transducer or prefer placing
    696 //     them later in a path since they delay matching and can
    697 //     introduce non-coaccessible states and transitions.
    698 template<class Arc>
    699 void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
    700              MutableFst<Arc> *ofst,
    701              const ComposeOptions &opts = ComposeOptions()) {
    702   typedef Matcher< Fst<Arc> > M;
    703 
    704   if (opts.filter_type == AUTO_FILTER) {
    705      CacheOptions nopts;
    706      nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
    707      *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
    708   } else if (opts.filter_type == SEQUENCE_FILTER) {
    709     ComposeFstOptions<Arc> copts;
    710     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
    711     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
    712   } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
    713     ComposeFstOptions<Arc, M,  AltSequenceComposeFilter<M> > copts;
    714     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
    715     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
    716   } else if (opts.filter_type == MATCH_FILTER) {
    717     ComposeFstOptions<Arc, M,  MatchComposeFilter<M> > copts;
    718     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
    719     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
    720   }
    721 
    722   if (opts.connect)
    723     Connect(ofst);
    724 }
    725 
    726 }  // namespace fst
    727 
    728 #endif  // FST_LIB_COMPOSE_H__
    729