Home | History | Annotate | Download | only in lib
      1 // determinize.h
      2 
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 //
     17 // \file
     18 // Functions and classes to determinize an FST.
     19 
     20 #ifndef FST_LIB_DETERMINIZE_H__
     21 #define FST_LIB_DETERMINIZE_H__
     22 
     23 #include <algorithm>
     24 #include <map>
     25 
     26 #include <ext/hash_map>
     27 using __gnu_cxx::hash_map;
     28 #include <ext/slist>
     29 using __gnu_cxx::slist;
     30 
     31 #include "fst/lib/cache.h"
     32 #include "fst/lib/factor-weight.h"
     33 #include "fst/lib/map.h"
     34 #include "fst/lib/test-properties.h"
     35 
     36 namespace fst {
     37 
     38 //
     39 // COMMON DIVISORS - these are used in determinization to compute
     40 // the transition weights. In the simplest case, it is just the same
     41 // as the semiring Plus(). However, other choices permit more efficient
     42 // determinization when the output contains strings.
     43 //
     44 
     45 // The default common divisor uses the semiring Plus.
     46 template <class W>
     47 class DefaultCommonDivisor {
     48  public:
     49   typedef W Weight;
     50 
     51   W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); }
     52 };
     53 
     54 
     55 // The label common divisor for a (left) string semiring selects a
     56 // single letter common prefix or the empty string. This is used in
     57 // the determinization of output strings so that at most a single
     58 // letter will appear in the output of a transtion.
     59 template <typename L, StringType S>
     60 class LabelCommonDivisor {
     61  public:
     62   typedef StringWeight<L, S> Weight;
     63 
     64   Weight operator()(const Weight &w1, const Weight &w2) const {
     65     StringWeightIterator<L, S> iter1(w1);
     66     StringWeightIterator<L, S> iter2(w2);
     67 
     68     if (!(StringWeight<L, S>::Properties() & kLeftSemiring))
     69       LOG(FATAL) << "LabelCommonDivisor: Weight needs to be left semiring";
     70 
     71     if (w1.Size() == 0 || w2.Size() == 0)
     72       return Weight::One();
     73     else if (w1 == Weight::Zero())
     74       return Weight(iter2.Value());
     75     else if (w2 == Weight::Zero())
     76       return Weight(iter1.Value());
     77     else if (iter1.Value() == iter2.Value())
     78       return Weight(iter1.Value());
     79     else
     80       return Weight::One();
     81   }
     82 };
     83 
     84 
     85 // The gallic common divisor uses the label common divisor on the
     86 // string component and the template argument D common divisor on the
     87 // weight component, which defaults to the default common divisor.
     88 template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> >
     89 class GallicCommonDivisor {
     90  public:
     91   typedef GallicWeight<L, W, S> Weight;
     92 
     93   Weight operator()(const Weight &w1, const Weight &w2) const {
     94     return Weight(label_common_divisor_(w1.Value1(), w2.Value1()),
     95                   weight_common_divisor_(w1.Value2(), w2.Value2()));
     96   }
     97 
     98  private:
     99   LabelCommonDivisor<L, S> label_common_divisor_;
    100   D weight_common_divisor_;
    101 };
    102 
    103 // Options for finite-state transducer determinization.
    104 struct DeterminizeFstOptions : CacheOptions {
    105   float delta;  // Quantization delta for subset weights
    106 
    107   explicit DeterminizeFstOptions(const CacheOptions &opts, float del = kDelta)
    108       : CacheOptions(opts), delta(del) {}
    109 
    110   explicit DeterminizeFstOptions(float del = kDelta) : delta(del) {}
    111 };
    112 
    113 
    114 // Implementation of delayed DeterminizeFst. This base class is
    115 // common to the variants that implement acceptor and transducer
    116 // determinization.
    117 template <class A>
    118 class DeterminizeFstImplBase : public CacheImpl<A> {
    119  public:
    120   using FstImpl<A>::SetType;
    121   using FstImpl<A>::SetProperties;
    122   using FstImpl<A>::Properties;
    123   using FstImpl<A>::SetInputSymbols;
    124   using FstImpl<A>::SetOutputSymbols;
    125 
    126   using CacheBaseImpl< CacheState<A> >::HasStart;
    127   using CacheBaseImpl< CacheState<A> >::HasFinal;
    128   using CacheBaseImpl< CacheState<A> >::HasArcs;
    129 
    130   typedef typename A::Label Label;
    131   typedef typename A::Weight Weight;
    132   typedef typename A::StateId StateId;
    133   typedef CacheState<A> State;
    134 
    135   DeterminizeFstImplBase(const Fst<A> &fst, const CacheOptions &opts)
    136       : CacheImpl<A>(opts), fst_(fst.Copy()) {
    137     SetType("determinize");
    138     uint64 props = fst.Properties(kFstProperties, false);
    139     SetProperties(DeterminizeProperties(props), kCopyProperties);
    140 
    141     SetInputSymbols(fst.InputSymbols());
    142     SetOutputSymbols(fst.OutputSymbols());
    143   }
    144 
    145   virtual ~DeterminizeFstImplBase() { delete fst_; }
    146 
    147   StateId Start() {
    148     if (!HasStart()) {
    149       StateId start = ComputeStart();
    150       if (start != kNoStateId) {
    151         SetStart(start);
    152       }
    153     }
    154     return CacheImpl<A>::Start();
    155   }
    156 
    157   Weight Final(StateId s) {
    158     if (!HasFinal(s)) {
    159       Weight final = ComputeFinal(s);
    160       SetFinal(s, final);
    161     }
    162     return CacheImpl<A>::Final(s);
    163   }
    164 
    165   virtual void Expand(StateId s) = 0;
    166 
    167   size_t NumArcs(StateId s) {
    168     if (!HasArcs(s))
    169       Expand(s);
    170     return CacheImpl<A>::NumArcs(s);
    171   }
    172 
    173   size_t NumInputEpsilons(StateId s) {
    174     if (!HasArcs(s))
    175       Expand(s);
    176     return CacheImpl<A>::NumInputEpsilons(s);
    177   }
    178 
    179   size_t NumOutputEpsilons(StateId s) {
    180     if (!HasArcs(s))
    181       Expand(s);
    182     return CacheImpl<A>::NumOutputEpsilons(s);
    183   }
    184 
    185   void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
    186     if (!HasArcs(s))
    187       Expand(s);
    188     CacheImpl<A>::InitArcIterator(s, data);
    189   }
    190 
    191   virtual StateId ComputeStart() = 0;
    192 
    193   virtual Weight ComputeFinal(StateId s) = 0;
    194 
    195  protected:
    196   const Fst<A> *fst_;            // Input Fst
    197 
    198   DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFstImplBase);
    199 };
    200 
    201 
    202 // Implementation of delayed determinization for weighted acceptors.
    203 // It is templated on the arc type A and the common divisor C.
    204 template <class A, class C>
    205 class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> {
    206  public:
    207   using DeterminizeFstImplBase<A>::fst_;
    208 
    209   typedef typename A::Label Label;
    210   typedef typename A::Weight Weight;
    211   typedef typename A::StateId StateId;
    212 
    213   struct Element {
    214     Element() {}
    215 
    216     Element(StateId s, Weight w) : state_id(s), weight(w) {}
    217 
    218     StateId state_id;  // Input state Id
    219     Weight weight;     // Residual weight
    220   };
    221   typedef slist<Element> Subset;
    222   typedef map<Label, Subset*> LabelMap;
    223 
    224   DeterminizeFsaImpl(const Fst<A> &fst, C common_divisor,
    225                      const DeterminizeFstOptions &opts)
    226       : DeterminizeFstImplBase<A>(fst, opts),
    227         delta_(opts.delta), common_divisor_(common_divisor),
    228         subset_hash_(0, SubsetKey(), SubsetEqual(&elements_)) {
    229     if (!fst.Properties(kAcceptor, true))
    230      LOG(FATAL)  << "DeterminizeFst: argument not an acceptor";
    231   if (!(Weight::Properties() & kLeftSemiring))
    232     LOG(FATAL) << "DeterminizeFst: Weight needs to be left distributive: "
    233                << Weight::Type();
    234   }
    235 
    236   virtual ~DeterminizeFsaImpl() {
    237     for (unsigned int i = 0; i < subsets_.size(); ++i)
    238       delete subsets_[i];
    239   }
    240 
    241   virtual StateId ComputeStart() {
    242     StateId s = fst_->Start();
    243     if (s == kNoStateId)
    244       return kNoStateId;
    245     Element element(s, Weight::One());
    246     Subset *subset = new Subset;
    247     subset->push_front(element);
    248     return FindState(subset);
    249   }
    250 
    251   virtual Weight ComputeFinal(StateId s) {
    252     Subset *subset = subsets_[s];
    253     Weight final = Weight::Zero();
    254     for (typename Subset::iterator siter = subset->begin();
    255          siter != subset->end();
    256          ++siter) {
    257       Element &element = *siter;
    258       final = Plus(final, Times(element.weight,
    259                                 fst_->Final(element.state_id)));
    260       }
    261     return final;
    262   }
    263 
    264   // Finds the state corresponding to a subset. Only creates a new state
    265   // if the subset is not found in the subset hash. FindState takes
    266   // ownership of the subset argument (so that it doesn't have to copy it
    267   // if it creates a new state).
    268   //
    269   // The method exploits the following device: all pairs stored in the
    270   // associative container subset_hash_ are of the form (subset,
    271   // id(subset) + 1), i.e. subset_hash_[subset] > 0 if subset has been
    272   // stored previously. For unassigned subsets, the call to
    273   // subset_hash_[subset] creates a new pair (subset, 0). As a result,
    274   // subset_hash_[subset] == 0 iff subset is new.
    275   StateId FindState(Subset *subset) {
    276     StateId &assoc_value = subset_hash_[subset];
    277     if (assoc_value == 0) {  // subset wasn't present; assign it a new ID
    278       subsets_.push_back(subset);
    279       assoc_value = subsets_.size();
    280     } else {
    281       delete subset;
    282     }
    283     return assoc_value - 1;  // NB: assoc_value = ID + 1
    284   }
    285 
    286   // Computes the outgoing transitions from a state, creating new destination
    287   // states as needed.
    288   virtual void Expand(StateId s) {
    289 
    290     LabelMap label_map;
    291     LabelSubsets(s, &label_map);
    292 
    293     for (typename LabelMap::iterator liter = label_map.begin();
    294          liter != label_map.end();
    295          ++liter)
    296       AddArc(s, liter->first, liter->second);
    297     SetArcs(s);
    298   }
    299 
    300  private:
    301   // Constructs destination subsets per label. At return, subset
    302   // element weights include the input automaton label weights and the
    303   // subsets may contain duplicate states.
    304   void LabelSubsets(StateId s, LabelMap *label_map) {
    305     Subset *src_subset = subsets_[s];
    306 
    307     for (typename Subset::iterator siter = src_subset->begin();
    308          siter != src_subset->end();
    309          ++siter) {
    310       Element &src_element = *siter;
    311       for (ArcIterator< Fst<A> > aiter(*fst_, src_element.state_id);
    312            !aiter.Done();
    313            aiter.Next()) {
    314         const A &arc = aiter.Value();
    315         Element dest_element(arc.nextstate,
    316                              Times(src_element.weight, arc.weight));
    317         Subset* &dest_subset = (*label_map)[arc.ilabel];
    318         if (dest_subset == 0)
    319           dest_subset = new Subset;
    320         dest_subset->push_front(dest_element);
    321       }
    322     }
    323   }
    324 
    325   // Adds an arc from state S to the destination state associated
    326   // with subset DEST_SUBSET (as created by LabelSubsets).
    327   void AddArc(StateId s, Label label, Subset *dest_subset) {
    328     A arc;
    329     arc.ilabel = label;
    330     arc.olabel = label;
    331     arc.weight = Weight::Zero();
    332 
    333     typename Subset::iterator oiter;
    334     for (typename Subset::iterator diter = dest_subset->begin();
    335          diter != dest_subset->end();) {
    336       Element &dest_element = *diter;
    337       // Computes label weight.
    338       arc.weight = common_divisor_(arc.weight, dest_element.weight);
    339 
    340       while ((StateId)elements_.size() <= dest_element.state_id)
    341         elements_.push_back(0);
    342       Element *matching_element = elements_[dest_element.state_id];
    343       if (matching_element) {
    344         // Found duplicate state: sums state weight and deletes dup.
    345         matching_element->weight = Plus(matching_element->weight,
    346                                         dest_element.weight);
    347         ++diter;
    348         dest_subset->erase_after(oiter);
    349       } else {
    350         // Saves element so we can check for duplicate for this state.
    351         elements_[dest_element.state_id] = &dest_element;
    352         oiter = diter;
    353         ++diter;
    354       }
    355     }
    356 
    357     // Divides out label weight from destination subset elements.
    358     // Quantizes to ensure comparisons are effective.
    359     // Clears element vector.
    360     for (typename Subset::iterator diter = dest_subset->begin();
    361          diter != dest_subset->end();
    362          ++diter) {
    363       Element &dest_element = *diter;
    364       dest_element.weight = Divide(dest_element.weight, arc.weight,
    365                                    DIVIDE_LEFT);
    366       dest_element.weight = dest_element.weight.Quantize(delta_);
    367       elements_[dest_element.state_id] = 0;
    368     }
    369 
    370     arc.nextstate = FindState(dest_subset);
    371     CacheImpl<A>::AddArc(s, arc);
    372   }
    373 
    374   // Comparison object for hashing Subset(s). Subsets are not sorted in this
    375   // implementation, so ordering must not be assumed in the equivalence
    376   // test.
    377   class SubsetEqual {
    378    public:
    379     // Constructor takes vector needed to check equality. See immediately
    380     // below for constraints on it.
    381     explicit SubsetEqual(vector<Element *> *elements)
    382         : elements_(elements) {}
    383 
    384     // At each call to operator(), elements_[state] must be defined and
    385     // NULL for each state in the subset arguments. When this operator
    386     // returns, elements_ will preserve that property. We keep it
    387     // full of NULLs so that it is ready for the next call.
    388     bool operator()(Subset* subset1, Subset* subset2) const {
    389         if (subset1->size() != subset2->size())
    390           return false;
    391 
    392       // Loads first subset elements in element vector.
    393       for (typename Subset::iterator iter1 = subset1->begin();
    394            iter1 != subset1->end();
    395            ++iter1) {
    396         Element &element1 = *iter1;
    397         (*elements_)[element1.state_id] = &element1;
    398       }
    399 
    400       // Checks second subset matches first via element vector.
    401       for (typename Subset::iterator iter2 = subset2->begin();
    402            iter2 != subset2->end();
    403            ++iter2) {
    404         Element &element2 = *iter2;
    405         Element *element1 = (*elements_)[element2.state_id];
    406         if (!element1 || element1->weight != element2.weight) {
    407           // Mismatch found. Resets element vector before returning false.
    408           for (typename Subset::iterator iter1 = subset1->begin();
    409                iter1 != subset1->end();
    410                ++iter1)
    411             (*elements_)[iter1->state_id] = 0;
    412           return false;
    413         } else {
    414           (*elements_)[element2.state_id] = 0;  // Clears entry
    415         }
    416       }
    417       return true;
    418     }
    419    private:
    420     vector<Element *> *elements_;
    421   };
    422 
    423   // Hash function for Subset to Fst states. Subset elements are not
    424   // sorted in this implementation, so the hash must be invariant
    425   // under subset reordering.
    426   class SubsetKey {
    427    public:
    428     size_t operator()(const Subset* subset) const {
    429       size_t hash = 0;
    430       for (typename Subset::const_iterator iter = subset->begin();
    431            iter != subset->end();
    432            ++iter) {
    433         const Element &element = *iter;
    434         int lshift = element.state_id % kPrime;
    435         int rshift = sizeof(size_t) - lshift;
    436         hash ^= element.state_id << lshift ^
    437                 element.state_id >> rshift ^
    438                 element.weight.Hash();
    439       }
    440       return hash;
    441     }
    442 
    443    private:
    444     static const int kPrime = sizeof(size_t) == 8 ? 23 : 13;
    445   };
    446 
    447   float delta_;                  // Quantization delta for subset weights
    448   C common_divisor_;
    449 
    450   // Used to test equivalence of subsets.
    451   vector<Element *> elements_;
    452 
    453   // Maps from StateId to Subset.
    454   vector<Subset *> subsets_;
    455 
    456   // Hashes from Subset to its StateId in the output automaton.
    457   typedef hash_map<Subset *, StateId, SubsetKey, SubsetEqual>
    458   SubsetHash;
    459 
    460   // Hashes from Label to Subsets corr. to destination states of current state.
    461   SubsetHash subset_hash_;
    462 
    463   DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFsaImpl);
    464 };
    465 
    466 
    467 // Implementation of delayed determinization for transducers.
    468 // Transducer determinization is implemented by mapping the input to
    469 // the Gallic semiring as an acceptor whose weights contain the output
    470 // strings and using acceptor determinization above to determinize
    471 // that acceptor.
    472 template <class A, StringType S>
    473 class DeterminizeFstImpl : public DeterminizeFstImplBase<A> {
    474  public:
    475   typedef typename A::Label Label;
    476   typedef typename A::Weight Weight;
    477   typedef typename A::StateId StateId;
    478 
    479   typedef ToGallicMapper<A, S> ToMapper;
    480   typedef FromGallicMapper<A, S> FromMapper;
    481 
    482   typedef typename ToMapper::ToArc ToArc;
    483   typedef MapFst<A, ToArc, ToMapper> ToFst;
    484   typedef MapFst<ToArc, A, FromMapper> FromFst;
    485 
    486   typedef GallicCommonDivisor<Label, Weight, S> CommonDivisor;
    487   typedef GallicFactor<Label, Weight, S> FactorIterator;
    488 
    489   // Defined after DeterminizeFst since it calls it.
    490   DeterminizeFstImpl(const Fst<A> &fst, const DeterminizeFstOptions &opts);
    491 
    492   ~DeterminizeFstImpl() { delete from_fst_; }
    493 
    494   virtual StateId ComputeStart() { return from_fst_->Start(); }
    495 
    496   virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); }
    497 
    498   virtual void Expand(StateId s) {
    499     for (ArcIterator<FromFst> aiter(*from_fst_, s);
    500          !aiter.Done();
    501          aiter.Next())
    502       CacheImpl<A>::AddArc(s, aiter.Value());
    503     CacheImpl<A>::SetArcs(s);
    504   }
    505 
    506  private:
    507   FromFst *from_fst_;
    508 
    509   DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFstImpl);
    510 };
    511 
    512 
    513 // Determinizes a weighted transducer. This version is a delayed
    514 // Fst. The result will be an equivalent FST that has the property
    515 // that no state has two transitions with the same input label.
    516 // For this algorithm, epsilon transitions are treated as regular
    517 // symbols (cf. RmEpsilon).
    518 //
    519 // The transducer must be functional. The weights must be (weakly)
    520 // left divisible (valid for TropicalWeight and LogWeight).
    521 //
    522 // Complexity:
    523 // - Determinizable: exponential (polynomial in the size of the output)
    524 // - Non-determinizable) does not terminate
    525 //
    526 // The determinizable automata include all unweighted and all acyclic input.
    527 //
    528 // References:
    529 // - Mehryar Mohri, "Finite-State Transducers in Language and Speech
    530 //   Processing". Computational Linguistics, 23:2, 1997.
    531 template <class A>
    532 class DeterminizeFst : public Fst<A> {
    533  public:
    534   friend class ArcIterator< DeterminizeFst<A> >;
    535   friend class CacheStateIterator< DeterminizeFst<A> >;
    536   friend class CacheArcIterator< DeterminizeFst<A> >;
    537   template <class B, StringType S> friend class DeterminizeFstImpl;
    538 
    539   typedef A Arc;
    540   typedef typename A::Weight Weight;
    541   typedef typename A::StateId StateId;
    542   typedef typename A::Label Label;
    543   typedef CacheState<A> State;
    544 
    545   explicit DeterminizeFst(const Fst<A> &fst,
    546                  const DeterminizeFstOptions &opts = DeterminizeFstOptions()) {
    547     if (fst.Properties(kAcceptor, true)) {
    548       // Calls implementation for acceptors.
    549       typedef DefaultCommonDivisor<Weight> D;
    550       impl_ = new DeterminizeFsaImpl<A, D>(fst, D(), opts);
    551     } else {
    552       // Calls implementation for transducers.
    553       impl_ = new DeterminizeFstImpl<A, STRING_LEFT_RESTRICT>(fst, opts);
    554     }
    555   }
    556 
    557   DeterminizeFst(const DeterminizeFst<A> &fst) : Fst<A>(fst), impl_(fst.impl_) {
    558     impl_->IncrRefCount();
    559   }
    560 
    561   virtual ~DeterminizeFst() { if (!impl_->DecrRefCount()) delete impl_; }
    562 
    563   virtual StateId Start() const { return impl_->Start(); }
    564 
    565   virtual Weight Final(StateId s) const { return impl_->Final(s); }
    566 
    567   virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
    568 
    569   virtual size_t NumInputEpsilons(StateId s) const {
    570     return impl_->NumInputEpsilons(s);
    571   }
    572 
    573   virtual size_t NumOutputEpsilons(StateId s) const {
    574     return impl_->NumOutputEpsilons(s);
    575   }
    576 
    577   virtual uint64 Properties(uint64 mask, bool test) const {
    578     if (test) {
    579       uint64 known, test = TestProperties(*this, mask, &known);
    580       impl_->SetProperties(test, known);
    581       return test & mask;
    582     } else {
    583       return impl_->Properties(mask);
    584     }
    585   }
    586 
    587   virtual const string& Type() const { return impl_->Type(); }
    588 
    589   virtual DeterminizeFst<A> *Copy() const {
    590     return new DeterminizeFst<A>(*this);
    591   }
    592 
    593   virtual const SymbolTable* InputSymbols() const {
    594     return impl_->InputSymbols();
    595   }
    596 
    597   virtual const SymbolTable* OutputSymbols() const {
    598     return impl_->OutputSymbols();
    599   }
    600 
    601   virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
    602 
    603   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
    604     impl_->InitArcIterator(s, data);
    605   }
    606 
    607  protected:
    608   DeterminizeFstImplBase<A> *Impl() { return impl_; }
    609 
    610  private:
    611   // This private version is for passing the common divisor to
    612   // FSA determinization.
    613   template <class D>
    614   DeterminizeFst(const Fst<A> &fst, const D &common_divisor,
    615                  const DeterminizeFstOptions &opts)
    616       :  impl_(new DeterminizeFsaImpl<A, D>(fst, common_divisor, opts)) {}
    617 
    618   DeterminizeFstImplBase<A> *impl_;
    619 
    620   void operator=(const DeterminizeFst<A> &fst);  // Disallow
    621 };
    622 
    623 
    624 template <class A, StringType S>
    625 DeterminizeFstImpl<A, S>::DeterminizeFstImpl(
    626     const Fst<A> &fst, const DeterminizeFstOptions &opts)
    627     : DeterminizeFstImplBase<A>(fst, opts) {
    628 
    629   // Mapper to an acceptor.
    630   ToFst to_fst(fst, ToMapper());
    631 
    632   // Determinize acceptor.
    633   // This recursive call terminates since it passes the common divisor
    634   // to a private constructor.
    635   DeterminizeFst<ToArc> det_fsa(to_fst, CommonDivisor(), opts);
    636 
    637   // Mapper back to transducer.
    638   FactorWeightOptions fopts(CacheOptions(true, 0), opts.delta, true);
    639   FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts);
    640   from_fst_ = new FromFst(factored_fst, FromMapper());
    641 }
    642 
    643 
    644 // Specialization for DeterminizeFst.
    645 template <class A>
    646 class StateIterator< DeterminizeFst<A> >
    647     : public CacheStateIterator< DeterminizeFst<A> > {
    648  public:
    649   explicit StateIterator(const DeterminizeFst<A> &fst)
    650       : CacheStateIterator< DeterminizeFst<A> >(fst) {}
    651 };
    652 
    653 
    654 // Specialization for DeterminizeFst.
    655 template <class A>
    656 class ArcIterator< DeterminizeFst<A> >
    657     : public CacheArcIterator< DeterminizeFst<A> > {
    658  public:
    659   typedef typename A::StateId StateId;
    660 
    661   ArcIterator(const DeterminizeFst<A> &fst, StateId s)
    662       : CacheArcIterator< DeterminizeFst<A> >(fst, s) {
    663     if (!fst.impl_->HasArcs(s))
    664       fst.impl_->Expand(s);
    665   }
    666 
    667  private:
    668   DISALLOW_EVIL_CONSTRUCTORS(ArcIterator);
    669 };
    670 
    671 
    672 template <class A> inline
    673 void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
    674 {
    675   data->base = new StateIterator< DeterminizeFst<A> >(*this);
    676 }
    677 
    678 
    679 // Useful aliases when using StdArc.
    680 typedef DeterminizeFst<StdArc> StdDeterminizeFst;
    681 
    682 
    683 struct DeterminizeOptions {
    684   float delta;                   // Quantization delta for subset weights
    685 
    686   explicit DeterminizeOptions(float d) : delta(d) {}
    687   DeterminizeOptions() :delta(kDelta) {}
    688 };
    689 
    690 
    691 // Determinizes a weighted transducer.  This version writes the
    692 // determinized Fst to an output MutableFst.  The result will be an
    693 // equivalent FSt that has the property that no state has two
    694 // transitions with the same input label.  For this algorithm, epsilon
    695 // transitions are treated as regular symbols (cf. RmEpsilon).
    696 //
    697 // The transducer must be functional. The weights must be (weakly)
    698 // left divisible (valid for TropicalWeight and LogWeight).
    699 //
    700 // Complexity:
    701 // - Determinizable: exponential (polynomial in the size of the output)
    702 // - Non-determinizable: does not terminate
    703 //
    704 // The determinizable automata include all unweighted and all acyclic input.
    705 //
    706 // References:
    707 // - Mehryar Mohri, "Finite-State Transducers in Language and Speech
    708 //   Processing". Computational Linguistics, 23:2, 1997.
    709 template <class Arc>
    710 void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
    711              const DeterminizeOptions &opts = DeterminizeOptions()) {
    712   DeterminizeFstOptions nopts;
    713   nopts.delta = opts.delta;
    714   nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
    715   *ofst = DeterminizeFst<Arc>(ifst, nopts);
    716 }
    717 
    718 
    719 }  // namespace fst
    720 
    721 #endif  // FST_LIB_DETERMINIZE_H__
    722