Home | History | Annotate | Download | only in lib
      1 // determinize.h
      2 
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 //
     17 // \file
     18 // Functions and classes to determinize an FST.
     19 
     20 #ifndef FST_LIB_DETERMINIZE_H__
     21 #define FST_LIB_DETERMINIZE_H__
     22 
     23 #include <algorithm>
     24 #include <map>
     25 
     26 #include <unordered_map>
     27 #include <forward_list>
     28 
     29 #include "fst/lib/cache.h"
     30 #include "fst/lib/factor-weight.h"
     31 #include "fst/lib/map.h"
     32 #include "fst/lib/test-properties.h"
     33 
     34 namespace fst {
     35 
     36 //
     37 // COMMON DIVISORS - these are used in determinization to compute
     38 // the transition weights. In the simplest case, it is just the same
     39 // as the semiring Plus(). However, other choices permit more efficient
     40 // determinization when the output contains strings.
     41 //
     42 
     43 // The default common divisor uses the semiring Plus.
     44 template <class W>
     45 class DefaultCommonDivisor {
     46  public:
     47   typedef W Weight;
     48 
     49   W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); }
     50 };
     51 
     52 
     53 // The label common divisor for a (left) string semiring selects a
     54 // single letter common prefix or the empty string. This is used in
     55 // the determinization of output strings so that at most a single
     56 // letter will appear in the output of a transtion.
     57 template <typename L, StringType S>
     58 class LabelCommonDivisor {
     59  public:
     60   typedef StringWeight<L, S> Weight;
     61 
     62   Weight operator()(const Weight &w1, const Weight &w2) const {
     63     StringWeightIterator<L, S> iter1(w1);
     64     StringWeightIterator<L, S> iter2(w2);
     65 
     66     if (!(StringWeight<L, S>::Properties() & kLeftSemiring))
     67       LOG(FATAL) << "LabelCommonDivisor: Weight needs to be left semiring";
     68 
     69     if (w1.Size() == 0 || w2.Size() == 0)
     70       return Weight::One();
     71     else if (w1 == Weight::Zero())
     72       return Weight(iter2.Value());
     73     else if (w2 == Weight::Zero())
     74       return Weight(iter1.Value());
     75     else if (iter1.Value() == iter2.Value())
     76       return Weight(iter1.Value());
     77     else
     78       return Weight::One();
     79   }
     80 };
     81 
     82 
     83 // The gallic common divisor uses the label common divisor on the
     84 // string component and the template argument D common divisor on the
     85 // weight component, which defaults to the default common divisor.
     86 template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> >
     87 class GallicCommonDivisor {
     88  public:
     89   typedef GallicWeight<L, W, S> Weight;
     90 
     91   Weight operator()(const Weight &w1, const Weight &w2) const {
     92     return Weight(label_common_divisor_(w1.Value1(), w2.Value1()),
     93                   weight_common_divisor_(w1.Value2(), w2.Value2()));
     94   }
     95 
     96  private:
     97   LabelCommonDivisor<L, S> label_common_divisor_;
     98   D weight_common_divisor_;
     99 };
    100 
    101 // Options for finite-state transducer determinization.
    102 struct DeterminizeFstOptions : CacheOptions {
    103   float delta;  // Quantization delta for subset weights
    104 
    105   explicit DeterminizeFstOptions(const CacheOptions &opts, float del = kDelta)
    106       : CacheOptions(opts), delta(del) {}
    107 
    108   explicit DeterminizeFstOptions(float del = kDelta) : delta(del) {}
    109 };
    110 
    111 
    112 // Implementation of delayed DeterminizeFst. This base class is
    113 // common to the variants that implement acceptor and transducer
    114 // determinization.
    115 template <class A>
    116 class DeterminizeFstImplBase : public CacheImpl<A> {
    117  public:
    118   using FstImpl<A>::SetType;
    119   using FstImpl<A>::SetProperties;
    120   using FstImpl<A>::Properties;
    121   using FstImpl<A>::SetInputSymbols;
    122   using FstImpl<A>::SetOutputSymbols;
    123 
    124   using CacheBaseImpl< CacheState<A> >::HasStart;
    125   using CacheBaseImpl< CacheState<A> >::HasFinal;
    126   using CacheBaseImpl< CacheState<A> >::HasArcs;
    127 
    128   typedef typename A::Label Label;
    129   typedef typename A::Weight Weight;
    130   typedef typename A::StateId StateId;
    131   typedef CacheState<A> State;
    132 
    133   DeterminizeFstImplBase(const Fst<A> &fst, const CacheOptions &opts)
    134       : CacheImpl<A>(opts), fst_(fst.Copy()) {
    135     SetType("determinize");
    136     uint64 props = fst.Properties(kFstProperties, false);
    137     SetProperties(DeterminizeProperties(props), kCopyProperties);
    138 
    139     SetInputSymbols(fst.InputSymbols());
    140     SetOutputSymbols(fst.OutputSymbols());
    141   }
    142 
    143   virtual ~DeterminizeFstImplBase() { delete fst_; }
    144 
    145   StateId Start() {
    146     if (!HasStart()) {
    147       StateId start = ComputeStart();
    148       if (start != kNoStateId) {
    149         this->SetStart(start);
    150       }
    151     }
    152     return CacheImpl<A>::Start();
    153   }
    154 
    155   Weight Final(StateId s) {
    156     if (!HasFinal(s)) {
    157       Weight final = ComputeFinal(s);
    158       this->SetFinal(s, final);
    159     }
    160     return CacheImpl<A>::Final(s);
    161   }
    162 
    163   virtual void Expand(StateId s) = 0;
    164 
    165   size_t NumArcs(StateId s) {
    166     if (!HasArcs(s))
    167       Expand(s);
    168     return CacheImpl<A>::NumArcs(s);
    169   }
    170 
    171   size_t NumInputEpsilons(StateId s) {
    172     if (!HasArcs(s))
    173       Expand(s);
    174     return CacheImpl<A>::NumInputEpsilons(s);
    175   }
    176 
    177   size_t NumOutputEpsilons(StateId s) {
    178     if (!HasArcs(s))
    179       Expand(s);
    180     return CacheImpl<A>::NumOutputEpsilons(s);
    181   }
    182 
    183   void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
    184     if (!HasArcs(s))
    185       Expand(s);
    186     CacheImpl<A>::InitArcIterator(s, data);
    187   }
    188 
    189   virtual StateId ComputeStart() = 0;
    190 
    191   virtual Weight ComputeFinal(StateId s) = 0;
    192 
    193  protected:
    194   const Fst<A> *fst_;            // Input Fst
    195 
    196   DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFstImplBase);
    197 };
    198 
    199 
    200 // Implementation of delayed determinization for weighted acceptors.
    201 // It is templated on the arc type A and the common divisor C.
    202 template <class A, class C>
    203 class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> {
    204  public:
    205   using DeterminizeFstImplBase<A>::fst_;
    206 
    207   typedef typename A::Label Label;
    208   typedef typename A::Weight Weight;
    209   typedef typename A::StateId StateId;
    210 
    211   struct Element {
    212     Element() {}
    213 
    214     Element(StateId s, Weight w) : state_id(s), weight(w) {}
    215 
    216     StateId state_id;  // Input state Id
    217     Weight weight;     // Residual weight
    218   };
    219   typedef std::forward_list<Element> Subset;
    220   typedef map<Label, Subset*> LabelMap;
    221 
    222   DeterminizeFsaImpl(const Fst<A> &fst, C common_divisor,
    223                      const DeterminizeFstOptions &opts)
    224       : DeterminizeFstImplBase<A>(fst, opts),
    225         delta_(opts.delta), common_divisor_(common_divisor),
    226         subset_hash_(0, SubsetKey(), SubsetEqual(&elements_)) {
    227     if (!fst.Properties(kAcceptor, true))
    228      LOG(FATAL)  << "DeterminizeFst: argument not an acceptor";
    229   if (!(Weight::Properties() & kLeftSemiring))
    230     LOG(FATAL) << "DeterminizeFst: Weight needs to be left distributive: "
    231                << Weight::Type();
    232   }
    233 
    234   virtual ~DeterminizeFsaImpl() {
    235     for (unsigned int i = 0; i < subsets_.size(); ++i)
    236       delete subsets_[i];
    237   }
    238 
    239   virtual StateId ComputeStart() {
    240     StateId s = fst_->Start();
    241     if (s == kNoStateId)
    242       return kNoStateId;
    243     Element element(s, Weight::One());
    244     Subset *subset = new Subset;
    245     subset->push_front(element);
    246     return FindState(subset);
    247   }
    248 
    249   virtual Weight ComputeFinal(StateId s) {
    250     Subset *subset = subsets_[s];
    251     Weight final = Weight::Zero();
    252     for (typename Subset::iterator siter = subset->begin();
    253          siter != subset->end();
    254          ++siter) {
    255       Element &element = *siter;
    256       final = Plus(final, Times(element.weight,
    257                                 fst_->Final(element.state_id)));
    258       }
    259     return final;
    260   }
    261 
    262   // Finds the state corresponding to a subset. Only creates a new state
    263   // if the subset is not found in the subset hash. FindState takes
    264   // ownership of the subset argument (so that it doesn't have to copy it
    265   // if it creates a new state).
    266   //
    267   // The method exploits the following device: all pairs stored in the
    268   // associative container subset_hash_ are of the form (subset,
    269   // id(subset) + 1), i.e. subset_hash_[subset] > 0 if subset has been
    270   // stored previously. For unassigned subsets, the call to
    271   // subset_hash_[subset] creates a new pair (subset, 0). As a result,
    272   // subset_hash_[subset] == 0 iff subset is new.
    273   StateId FindState(Subset *subset) {
    274     StateId &assoc_value = subset_hash_[subset];
    275     if (assoc_value == 0) {  // subset wasn't present; assign it a new ID
    276       subsets_.push_back(subset);
    277       assoc_value = subsets_.size();
    278     } else {
    279       delete subset;
    280     }
    281     return assoc_value - 1;  // NB: assoc_value = ID + 1
    282   }
    283 
    284   // Computes the outgoing transitions from a state, creating new destination
    285   // states as needed.
    286   virtual void Expand(StateId s) {
    287 
    288     LabelMap label_map;
    289     LabelSubsets(s, &label_map);
    290 
    291     for (typename LabelMap::iterator liter = label_map.begin();
    292          liter != label_map.end();
    293          ++liter)
    294       AddArc(s, liter->first, liter->second);
    295     this->SetArcs(s);
    296   }
    297 
    298  private:
    299   // Constructs destination subsets per label. At return, subset
    300   // element weights include the input automaton label weights and the
    301   // subsets may contain duplicate states.
    302   void LabelSubsets(StateId s, LabelMap *label_map) {
    303     Subset *src_subset = subsets_[s];
    304 
    305     for (typename Subset::iterator siter = src_subset->begin();
    306          siter != src_subset->end();
    307          ++siter) {
    308       Element &src_element = *siter;
    309       for (ArcIterator< Fst<A> > aiter(*fst_, src_element.state_id);
    310            !aiter.Done();
    311            aiter.Next()) {
    312         const A &arc = aiter.Value();
    313         Element dest_element(arc.nextstate,
    314                              Times(src_element.weight, arc.weight));
    315         Subset* &dest_subset = (*label_map)[arc.ilabel];
    316         if (dest_subset == 0)
    317           dest_subset = new Subset;
    318         dest_subset->push_front(dest_element);
    319       }
    320     }
    321   }
    322 
    323   // Adds an arc from state S to the destination state associated
    324   // with subset DEST_SUBSET (as created by LabelSubsets).
    325   void AddArc(StateId s, Label label, Subset *dest_subset) {
    326     A arc;
    327     arc.ilabel = label;
    328     arc.olabel = label;
    329     arc.weight = Weight::Zero();
    330 
    331     typename Subset::iterator oiter;
    332     for (typename Subset::iterator diter = dest_subset->begin();
    333          diter != dest_subset->end();) {
    334       Element &dest_element = *diter;
    335       // Computes label weight.
    336       arc.weight = common_divisor_(arc.weight, dest_element.weight);
    337 
    338       while ((StateId)elements_.size() <= dest_element.state_id)
    339         elements_.push_back(0);
    340       Element *matching_element = elements_[dest_element.state_id];
    341       if (matching_element) {
    342         // Found duplicate state: sums state weight and deletes dup.
    343         matching_element->weight = Plus(matching_element->weight,
    344                                         dest_element.weight);
    345         ++diter;
    346         dest_subset->erase_after(oiter);
    347       } else {
    348         // Saves element so we can check for duplicate for this state.
    349         elements_[dest_element.state_id] = &dest_element;
    350         oiter = diter;
    351         ++diter;
    352       }
    353     }
    354 
    355     // Divides out label weight from destination subset elements.
    356     // Quantizes to ensure comparisons are effective.
    357     // Clears element vector.
    358     for (typename Subset::iterator diter = dest_subset->begin();
    359          diter != dest_subset->end();
    360          ++diter) {
    361       Element &dest_element = *diter;
    362       dest_element.weight = Divide(dest_element.weight, arc.weight,
    363                                    DIVIDE_LEFT);
    364       dest_element.weight = dest_element.weight.Quantize(delta_);
    365       elements_[dest_element.state_id] = 0;
    366     }
    367 
    368     arc.nextstate = FindState(dest_subset);
    369     CacheImpl<A>::AddArc(s, arc);
    370   }
    371 
    372   // Comparison object for hashing Subset(s). Subsets are not sorted in this
    373   // implementation, so ordering must not be assumed in the equivalence
    374   // test.
    375   class SubsetEqual {
    376    public:
    377     // Constructor takes vector needed to check equality. See immediately
    378     // below for constraints on it.
    379     explicit SubsetEqual(vector<Element *> *elements)
    380         : elements_(elements) {}
    381 
    382     // At each call to operator(), elements_[state] must be defined and
    383     // NULL for each state in the subset arguments. When this operator
    384     // returns, elements_ will preserve that property. We keep it
    385     // full of NULLs so that it is ready for the next call.
    386     bool operator()(Subset* subset1, Subset* subset2) const {
    387       size_t subset1_size = std::distance(subset1->begin(), subset1->end());
    388       size_t subset2_size = std::distance(subset2->begin(), subset2->end());
    389       if (subset1_size != subset2_size)
    390         return false;
    391 
    392       // Loads first subset elements in element vector.
    393       for (typename Subset::iterator iter1 = subset1->begin();
    394            iter1 != subset1->end();
    395            ++iter1) {
    396         Element &element1 = *iter1;
    397         (*elements_)[element1.state_id] = &element1;
    398       }
    399 
    400       // Checks second subset matches first via element vector.
    401       for (typename Subset::iterator iter2 = subset2->begin();
    402            iter2 != subset2->end();
    403            ++iter2) {
    404         Element &element2 = *iter2;
    405         Element *element1 = (*elements_)[element2.state_id];
    406         if (!element1 || element1->weight != element2.weight) {
    407           // Mismatch found. Resets element vector before returning false.
    408           for (typename Subset::iterator iter1 = subset1->begin();
    409                iter1 != subset1->end();
    410                ++iter1)
    411             (*elements_)[iter1->state_id] = 0;
    412           return false;
    413         } else {
    414           (*elements_)[element2.state_id] = 0;  // Clears entry
    415         }
    416       }
    417       return true;
    418     }
    419    private:
    420     vector<Element *> *elements_;
    421   };
    422 
    423   // Hash function for Subset to Fst states. Subset elements are not
    424   // sorted in this implementation, so the hash must be invariant
    425   // under subset reordering.
    426   class SubsetKey {
    427    public:
    428     size_t operator()(const Subset* subset) const {
    429       size_t hash = 0;
    430       for (typename Subset::const_iterator iter = subset->begin();
    431            iter != subset->end();
    432            ++iter) {
    433         const Element &element = *iter;
    434         int lshift = element.state_id % kPrime;
    435         int rshift = sizeof(size_t) - lshift;
    436         hash ^= element.state_id << lshift ^
    437                 element.state_id >> rshift ^
    438                 element.weight.Hash();
    439       }
    440       return hash;
    441     }
    442 
    443    private:
    444     static const int kPrime = sizeof(size_t) == 8 ? 23 : 13;
    445   };
    446 
    447   float delta_;                  // Quantization delta for subset weights
    448   C common_divisor_;
    449 
    450   // Used to test equivalence of subsets.
    451   vector<Element *> elements_;
    452 
    453   // Maps from StateId to Subset.
    454   vector<Subset *> subsets_;
    455 
    456   // Hashes from Subset to its StateId in the output automaton.
    457   typedef std::unordered_map<Subset *, StateId, SubsetKey, SubsetEqual>
    458   SubsetHash;
    459 
    460   // Hashes from Label to Subsets corr. to destination states of current state.
    461   SubsetHash subset_hash_;
    462 
    463   DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFsaImpl);
    464 };
    465 
    466 
    467 // Implementation of delayed determinization for transducers.
    468 // Transducer determinization is implemented by mapping the input to
    469 // the Gallic semiring as an acceptor whose weights contain the output
    470 // strings and using acceptor determinization above to determinize
    471 // that acceptor.
    472 template <class A, StringType S>
    473 class DeterminizeFstImpl : public DeterminizeFstImplBase<A> {
    474  public:
    475   typedef typename A::Label Label;
    476   typedef typename A::Weight Weight;
    477   typedef typename A::StateId StateId;
    478 
    479   typedef ToGallicMapper<A, S> ToMapper;
    480   typedef FromGallicMapper<A, S> FromMapper;
    481 
    482   typedef typename ToMapper::ToArc ToArc;
    483   typedef MapFst<A, ToArc, ToMapper> ToFst;
    484   typedef MapFst<ToArc, A, FromMapper> FromFst;
    485 
    486   typedef GallicCommonDivisor<Label, Weight, S> CommonDivisor;
    487   typedef GallicFactor<Label, Weight, S> FactorIterator;
    488 
    489   // Defined after DeterminizeFst since it calls it.
    490   DeterminizeFstImpl(const Fst<A> &fst, const DeterminizeFstOptions &opts);
    491 
    492   ~DeterminizeFstImpl() { delete from_fst_; }
    493 
    494   virtual StateId ComputeStart() { return from_fst_->Start(); }
    495 
    496   virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); }
    497 
    498   virtual void Expand(StateId s) {
    499     for (ArcIterator<FromFst> aiter(*from_fst_, s);
    500          !aiter.Done();
    501          aiter.Next())
    502       CacheImpl<A>::AddArc(s, aiter.Value());
    503     CacheImpl<A>::SetArcs(s);
    504   }
    505 
    506  private:
    507   FromFst *from_fst_;
    508 
    509   DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFstImpl);
    510 };
    511 
    512 
    513 // Determinizes a weighted transducer. This version is a delayed
    514 // Fst. The result will be an equivalent FST that has the property
    515 // that no state has two transitions with the same input label.
    516 // For this algorithm, epsilon transitions are treated as regular
    517 // symbols (cf. RmEpsilon).
    518 //
    519 // The transducer must be functional. The weights must be (weakly)
    520 // left divisible (valid for TropicalWeight and LogWeight).
    521 //
    522 // Complexity:
    523 // - Determinizable: exponential (polynomial in the size of the output)
    524 // - Non-determinizable) does not terminate
    525 //
    526 // The determinizable automata include all unweighted and all acyclic input.
    527 //
    528 // References:
    529 // - Mehryar Mohri, "Finite-State Transducers in Language and Speech
    530 //   Processing". Computational Linguistics, 23:2, 1997.
    531 template <class A>
    532 class DeterminizeFst : public Fst<A> {
    533  public:
    534   friend class ArcIterator< DeterminizeFst<A> >;
    535   friend class CacheStateIterator< DeterminizeFst<A> >;
    536   friend class CacheArcIterator< DeterminizeFst<A> >;
    537   template <class B, StringType S> friend class DeterminizeFstImpl;
    538 
    539   typedef A Arc;
    540   typedef typename A::Weight Weight;
    541   typedef typename A::StateId StateId;
    542   typedef typename A::Label Label;
    543   typedef CacheState<A> State;
    544 
    545   explicit DeterminizeFst(const Fst<A> &fst,
    546                  const DeterminizeFstOptions &opts = DeterminizeFstOptions()) {
    547     if (fst.Properties(kAcceptor, true)) {
    548       // Calls implementation for acceptors.
    549       typedef DefaultCommonDivisor<Weight> D;
    550       impl_ = new DeterminizeFsaImpl<A, D>(fst, D(), opts);
    551     } else {
    552       // Calls implementation for transducers.
    553       impl_ = new DeterminizeFstImpl<A, STRING_LEFT_RESTRICT>(fst, opts);
    554     }
    555   }
    556 
    557   DeterminizeFst(const DeterminizeFst<A> &fst) : Fst<A>(fst), impl_(fst.impl_) {
    558     impl_->IncrRefCount();
    559   }
    560 
    561   virtual ~DeterminizeFst() { if (!impl_->DecrRefCount()) delete impl_; }
    562 
    563   virtual StateId Start() const { return impl_->Start(); }
    564 
    565   virtual Weight Final(StateId s) const { return impl_->Final(s); }
    566 
    567   virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
    568 
    569   virtual size_t NumInputEpsilons(StateId s) const {
    570     return impl_->NumInputEpsilons(s);
    571   }
    572 
    573   virtual size_t NumOutputEpsilons(StateId s) const {
    574     return impl_->NumOutputEpsilons(s);
    575   }
    576 
    577   virtual uint64 Properties(uint64 mask, bool test) const {
    578     if (test) {
    579       uint64 known, test = TestProperties(*this, mask, &known);
    580       impl_->SetProperties(test, known);
    581       return test & mask;
    582     } else {
    583       return impl_->Properties(mask);
    584     }
    585   }
    586 
    587   virtual const string& Type() const { return impl_->Type(); }
    588 
    589   virtual DeterminizeFst<A> *Copy() const {
    590     return new DeterminizeFst<A>(*this);
    591   }
    592 
    593   virtual const SymbolTable* InputSymbols() const {
    594     return impl_->InputSymbols();
    595   }
    596 
    597   virtual const SymbolTable* OutputSymbols() const {
    598     return impl_->OutputSymbols();
    599   }
    600 
    601   virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
    602 
    603   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
    604     impl_->InitArcIterator(s, data);
    605   }
    606 
    607  protected:
    608   DeterminizeFstImplBase<A> *Impl() { return impl_; }
    609 
    610  private:
    611   // This private version is for passing the common divisor to
    612   // FSA determinization.
    613   template <class D>
    614   DeterminizeFst(const Fst<A> &fst, const D &common_divisor,
    615                  const DeterminizeFstOptions &opts)
    616       :  impl_(new DeterminizeFsaImpl<A, D>(fst, common_divisor, opts)) {}
    617 
    618   DeterminizeFstImplBase<A> *impl_;
    619 
    620   void operator=(const DeterminizeFst<A> &fst);  // Disallow
    621 };
    622 
    623 
    624 template <class A, StringType S>
    625 DeterminizeFstImpl<A, S>::DeterminizeFstImpl(
    626     const Fst<A> &fst, const DeterminizeFstOptions &opts)
    627     : DeterminizeFstImplBase<A>(fst, opts) {
    628 
    629   // Mapper to an acceptor.
    630   ToFst to_fst(fst, ToMapper());
    631 
    632   // Determinize acceptor.
    633   // This recursive call terminates since it passes the common divisor
    634   // to a private constructor.
    635   DeterminizeFst<ToArc> det_fsa(to_fst, CommonDivisor(), opts);
    636 
    637   // Mapper back to transducer.
    638   FactorWeightOptions fopts(CacheOptions(true, 0), opts.delta, true);
    639   FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts);
    640   from_fst_ = new FromFst(factored_fst, FromMapper());
    641 }
    642 
    643 
    644 // Specialization for DeterminizeFst.
    645 template <class A>
    646 class StateIterator< DeterminizeFst<A> >
    647     : public CacheStateIterator< DeterminizeFst<A> > {
    648  public:
    649   explicit StateIterator(const DeterminizeFst<A> &fst)
    650       : CacheStateIterator< DeterminizeFst<A> >(fst) {}
    651 };
    652 
    653 
    654 // Specialization for DeterminizeFst.
    655 template <class A>
    656 class ArcIterator< DeterminizeFst<A> >
    657     : public CacheArcIterator< DeterminizeFst<A> > {
    658  public:
    659   typedef typename A::StateId StateId;
    660 
    661   ArcIterator(const DeterminizeFst<A> &fst, StateId s)
    662       : CacheArcIterator< DeterminizeFst<A> >(fst, s) {
    663     if (!fst.impl_->HasArcs(s))
    664       fst.impl_->Expand(s);
    665   }
    666 
    667  private:
    668   DISALLOW_EVIL_CONSTRUCTORS(ArcIterator);
    669 };
    670 
    671 
    672 template <class A> inline
    673 void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
    674 {
    675   data->base = new StateIterator< DeterminizeFst<A> >(*this);
    676 }
    677 
    678 
    679 // Useful aliases when using StdArc.
    680 typedef DeterminizeFst<StdArc> StdDeterminizeFst;
    681 
    682 
    683 struct DeterminizeOptions {
    684   float delta;                   // Quantization delta for subset weights
    685 
    686   explicit DeterminizeOptions(float d) : delta(d) {}
    687   DeterminizeOptions() :delta(kDelta) {}
    688 };
    689 
    690 
    691 // Determinizes a weighted transducer.  This version writes the
    692 // determinized Fst to an output MutableFst.  The result will be an
    693 // equivalent FSt that has the property that no state has two
    694 // transitions with the same input label.  For this algorithm, epsilon
    695 // transitions are treated as regular symbols (cf. RmEpsilon).
    696 //
    697 // The transducer must be functional. The weights must be (weakly)
    698 // left divisible (valid for TropicalWeight and LogWeight).
    699 //
    700 // Complexity:
    701 // - Determinizable: exponential (polynomial in the size of the output)
    702 // - Non-determinizable: does not terminate
    703 //
    704 // The determinizable automata include all unweighted and all acyclic input.
    705 //
    706 // References:
    707 // - Mehryar Mohri, "Finite-State Transducers in Language and Speech
    708 //   Processing". Computational Linguistics, 23:2, 1997.
    709 template <class Arc>
    710 void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
    711              const DeterminizeOptions &opts = DeterminizeOptions()) {
    712   DeterminizeFstOptions nopts;
    713   nopts.delta = opts.delta;
    714   nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
    715   *ofst = DeterminizeFst<Arc>(ifst, nopts);
    716 }
    717 
    718 
    719 }  // namespace fst
    720 
    721 #endif  // FST_LIB_DETERMINIZE_H__
    722