1 // determinize.h 2 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // 17 // \file 18 // Functions and classes to determinize an FST. 19 20 #ifndef FST_LIB_DETERMINIZE_H__ 21 #define FST_LIB_DETERMINIZE_H__ 22 23 #include <algorithm> 24 #include <map> 25 26 #include <ext/hash_map> 27 using __gnu_cxx::hash_map; 28 #include <ext/slist> 29 using __gnu_cxx::slist; 30 31 #include "fst/lib/cache.h" 32 #include "fst/lib/factor-weight.h" 33 #include "fst/lib/map.h" 34 #include "fst/lib/test-properties.h" 35 36 namespace fst { 37 38 // 39 // COMMON DIVISORS - these are used in determinization to compute 40 // the transition weights. In the simplest case, it is just the same 41 // as the semiring Plus(). However, other choices permit more efficient 42 // determinization when the output contains strings. 43 // 44 45 // The default common divisor uses the semiring Plus. 46 template <class W> 47 class DefaultCommonDivisor { 48 public: 49 typedef W Weight; 50 51 W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); } 52 }; 53 54 55 // The label common divisor for a (left) string semiring selects a 56 // single letter common prefix or the empty string. This is used in 57 // the determinization of output strings so that at most a single 58 // letter will appear in the output of a transtion. 59 template <typename L, StringType S> 60 class LabelCommonDivisor { 61 public: 62 typedef StringWeight<L, S> Weight; 63 64 Weight operator()(const Weight &w1, const Weight &w2) const { 65 StringWeightIterator<L, S> iter1(w1); 66 StringWeightIterator<L, S> iter2(w2); 67 68 if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) 69 LOG(FATAL) << "LabelCommonDivisor: Weight needs to be left semiring"; 70 71 if (w1.Size() == 0 || w2.Size() == 0) 72 return Weight::One(); 73 else if (w1 == Weight::Zero()) 74 return Weight(iter2.Value()); 75 else if (w2 == Weight::Zero()) 76 return Weight(iter1.Value()); 77 else if (iter1.Value() == iter2.Value()) 78 return Weight(iter1.Value()); 79 else 80 return Weight::One(); 81 } 82 }; 83 84 85 // The gallic common divisor uses the label common divisor on the 86 // string component and the template argument D common divisor on the 87 // weight component, which defaults to the default common divisor. 88 template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> > 89 class GallicCommonDivisor { 90 public: 91 typedef GallicWeight<L, W, S> Weight; 92 93 Weight operator()(const Weight &w1, const Weight &w2) const { 94 return Weight(label_common_divisor_(w1.Value1(), w2.Value1()), 95 weight_common_divisor_(w1.Value2(), w2.Value2())); 96 } 97 98 private: 99 LabelCommonDivisor<L, S> label_common_divisor_; 100 D weight_common_divisor_; 101 }; 102 103 // Options for finite-state transducer determinization. 104 struct DeterminizeFstOptions : CacheOptions { 105 float delta; // Quantization delta for subset weights 106 107 explicit DeterminizeFstOptions(const CacheOptions &opts, float del = kDelta) 108 : CacheOptions(opts), delta(del) {} 109 110 explicit DeterminizeFstOptions(float del = kDelta) : delta(del) {} 111 }; 112 113 114 // Implementation of delayed DeterminizeFst. This base class is 115 // common to the variants that implement acceptor and transducer 116 // determinization. 117 template <class A> 118 class DeterminizeFstImplBase : public CacheImpl<A> { 119 public: 120 using FstImpl<A>::SetType; 121 using FstImpl<A>::SetProperties; 122 using FstImpl<A>::Properties; 123 using FstImpl<A>::SetInputSymbols; 124 using FstImpl<A>::SetOutputSymbols; 125 126 using CacheBaseImpl< CacheState<A> >::HasStart; 127 using CacheBaseImpl< CacheState<A> >::HasFinal; 128 using CacheBaseImpl< CacheState<A> >::HasArcs; 129 130 typedef typename A::Label Label; 131 typedef typename A::Weight Weight; 132 typedef typename A::StateId StateId; 133 typedef CacheState<A> State; 134 135 DeterminizeFstImplBase(const Fst<A> &fst, const CacheOptions &opts) 136 : CacheImpl<A>(opts), fst_(fst.Copy()) { 137 SetType("determinize"); 138 uint64 props = fst.Properties(kFstProperties, false); 139 SetProperties(DeterminizeProperties(props), kCopyProperties); 140 141 SetInputSymbols(fst.InputSymbols()); 142 SetOutputSymbols(fst.OutputSymbols()); 143 } 144 145 virtual ~DeterminizeFstImplBase() { delete fst_; } 146 147 StateId Start() { 148 if (!HasStart()) { 149 StateId start = ComputeStart(); 150 if (start != kNoStateId) { 151 this->SetStart(start); 152 } 153 } 154 return CacheImpl<A>::Start(); 155 } 156 157 Weight Final(StateId s) { 158 if (!HasFinal(s)) { 159 Weight final = ComputeFinal(s); 160 this->SetFinal(s, final); 161 } 162 return CacheImpl<A>::Final(s); 163 } 164 165 virtual void Expand(StateId s) = 0; 166 167 size_t NumArcs(StateId s) { 168 if (!HasArcs(s)) 169 Expand(s); 170 return CacheImpl<A>::NumArcs(s); 171 } 172 173 size_t NumInputEpsilons(StateId s) { 174 if (!HasArcs(s)) 175 Expand(s); 176 return CacheImpl<A>::NumInputEpsilons(s); 177 } 178 179 size_t NumOutputEpsilons(StateId s) { 180 if (!HasArcs(s)) 181 Expand(s); 182 return CacheImpl<A>::NumOutputEpsilons(s); 183 } 184 185 void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 186 if (!HasArcs(s)) 187 Expand(s); 188 CacheImpl<A>::InitArcIterator(s, data); 189 } 190 191 virtual StateId ComputeStart() = 0; 192 193 virtual Weight ComputeFinal(StateId s) = 0; 194 195 protected: 196 const Fst<A> *fst_; // Input Fst 197 198 DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFstImplBase); 199 }; 200 201 202 // Implementation of delayed determinization for weighted acceptors. 203 // It is templated on the arc type A and the common divisor C. 204 template <class A, class C> 205 class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> { 206 public: 207 using DeterminizeFstImplBase<A>::fst_; 208 209 typedef typename A::Label Label; 210 typedef typename A::Weight Weight; 211 typedef typename A::StateId StateId; 212 213 struct Element { 214 Element() {} 215 216 Element(StateId s, Weight w) : state_id(s), weight(w) {} 217 218 StateId state_id; // Input state Id 219 Weight weight; // Residual weight 220 }; 221 typedef slist<Element> Subset; 222 typedef map<Label, Subset*> LabelMap; 223 224 DeterminizeFsaImpl(const Fst<A> &fst, C common_divisor, 225 const DeterminizeFstOptions &opts) 226 : DeterminizeFstImplBase<A>(fst, opts), 227 delta_(opts.delta), common_divisor_(common_divisor), 228 subset_hash_(0, SubsetKey(), SubsetEqual(&elements_)) { 229 if (!fst.Properties(kAcceptor, true)) 230 LOG(FATAL) << "DeterminizeFst: argument not an acceptor"; 231 if (!(Weight::Properties() & kLeftSemiring)) 232 LOG(FATAL) << "DeterminizeFst: Weight needs to be left distributive: " 233 << Weight::Type(); 234 } 235 236 virtual ~DeterminizeFsaImpl() { 237 for (unsigned int i = 0; i < subsets_.size(); ++i) 238 delete subsets_[i]; 239 } 240 241 virtual StateId ComputeStart() { 242 StateId s = fst_->Start(); 243 if (s == kNoStateId) 244 return kNoStateId; 245 Element element(s, Weight::One()); 246 Subset *subset = new Subset; 247 subset->push_front(element); 248 return FindState(subset); 249 } 250 251 virtual Weight ComputeFinal(StateId s) { 252 Subset *subset = subsets_[s]; 253 Weight final = Weight::Zero(); 254 for (typename Subset::iterator siter = subset->begin(); 255 siter != subset->end(); 256 ++siter) { 257 Element &element = *siter; 258 final = Plus(final, Times(element.weight, 259 fst_->Final(element.state_id))); 260 } 261 return final; 262 } 263 264 // Finds the state corresponding to a subset. Only creates a new state 265 // if the subset is not found in the subset hash. FindState takes 266 // ownership of the subset argument (so that it doesn't have to copy it 267 // if it creates a new state). 268 // 269 // The method exploits the following device: all pairs stored in the 270 // associative container subset_hash_ are of the form (subset, 271 // id(subset) + 1), i.e. subset_hash_[subset] > 0 if subset has been 272 // stored previously. For unassigned subsets, the call to 273 // subset_hash_[subset] creates a new pair (subset, 0). As a result, 274 // subset_hash_[subset] == 0 iff subset is new. 275 StateId FindState(Subset *subset) { 276 StateId &assoc_value = subset_hash_[subset]; 277 if (assoc_value == 0) { // subset wasn't present; assign it a new ID 278 subsets_.push_back(subset); 279 assoc_value = subsets_.size(); 280 } else { 281 delete subset; 282 } 283 return assoc_value - 1; // NB: assoc_value = ID + 1 284 } 285 286 // Computes the outgoing transitions from a state, creating new destination 287 // states as needed. 288 virtual void Expand(StateId s) { 289 290 LabelMap label_map; 291 LabelSubsets(s, &label_map); 292 293 for (typename LabelMap::iterator liter = label_map.begin(); 294 liter != label_map.end(); 295 ++liter) 296 AddArc(s, liter->first, liter->second); 297 this->SetArcs(s); 298 } 299 300 private: 301 // Constructs destination subsets per label. At return, subset 302 // element weights include the input automaton label weights and the 303 // subsets may contain duplicate states. 304 void LabelSubsets(StateId s, LabelMap *label_map) { 305 Subset *src_subset = subsets_[s]; 306 307 for (typename Subset::iterator siter = src_subset->begin(); 308 siter != src_subset->end(); 309 ++siter) { 310 Element &src_element = *siter; 311 for (ArcIterator< Fst<A> > aiter(*fst_, src_element.state_id); 312 !aiter.Done(); 313 aiter.Next()) { 314 const A &arc = aiter.Value(); 315 Element dest_element(arc.nextstate, 316 Times(src_element.weight, arc.weight)); 317 Subset* &dest_subset = (*label_map)[arc.ilabel]; 318 if (dest_subset == 0) 319 dest_subset = new Subset; 320 dest_subset->push_front(dest_element); 321 } 322 } 323 } 324 325 // Adds an arc from state S to the destination state associated 326 // with subset DEST_SUBSET (as created by LabelSubsets). 327 void AddArc(StateId s, Label label, Subset *dest_subset) { 328 A arc; 329 arc.ilabel = label; 330 arc.olabel = label; 331 arc.weight = Weight::Zero(); 332 333 typename Subset::iterator oiter; 334 for (typename Subset::iterator diter = dest_subset->begin(); 335 diter != dest_subset->end();) { 336 Element &dest_element = *diter; 337 // Computes label weight. 338 arc.weight = common_divisor_(arc.weight, dest_element.weight); 339 340 while ((StateId)elements_.size() <= dest_element.state_id) 341 elements_.push_back(0); 342 Element *matching_element = elements_[dest_element.state_id]; 343 if (matching_element) { 344 // Found duplicate state: sums state weight and deletes dup. 345 matching_element->weight = Plus(matching_element->weight, 346 dest_element.weight); 347 ++diter; 348 dest_subset->erase_after(oiter); 349 } else { 350 // Saves element so we can check for duplicate for this state. 351 elements_[dest_element.state_id] = &dest_element; 352 oiter = diter; 353 ++diter; 354 } 355 } 356 357 // Divides out label weight from destination subset elements. 358 // Quantizes to ensure comparisons are effective. 359 // Clears element vector. 360 for (typename Subset::iterator diter = dest_subset->begin(); 361 diter != dest_subset->end(); 362 ++diter) { 363 Element &dest_element = *diter; 364 dest_element.weight = Divide(dest_element.weight, arc.weight, 365 DIVIDE_LEFT); 366 dest_element.weight = dest_element.weight.Quantize(delta_); 367 elements_[dest_element.state_id] = 0; 368 } 369 370 arc.nextstate = FindState(dest_subset); 371 CacheImpl<A>::AddArc(s, arc); 372 } 373 374 // Comparison object for hashing Subset(s). Subsets are not sorted in this 375 // implementation, so ordering must not be assumed in the equivalence 376 // test. 377 class SubsetEqual { 378 public: 379 // Constructor takes vector needed to check equality. See immediately 380 // below for constraints on it. 381 explicit SubsetEqual(vector<Element *> *elements) 382 : elements_(elements) {} 383 384 // At each call to operator(), elements_[state] must be defined and 385 // NULL for each state in the subset arguments. When this operator 386 // returns, elements_ will preserve that property. We keep it 387 // full of NULLs so that it is ready for the next call. 388 bool operator()(Subset* subset1, Subset* subset2) const { 389 if (subset1->size() != subset2->size()) 390 return false; 391 392 // Loads first subset elements in element vector. 393 for (typename Subset::iterator iter1 = subset1->begin(); 394 iter1 != subset1->end(); 395 ++iter1) { 396 Element &element1 = *iter1; 397 (*elements_)[element1.state_id] = &element1; 398 } 399 400 // Checks second subset matches first via element vector. 401 for (typename Subset::iterator iter2 = subset2->begin(); 402 iter2 != subset2->end(); 403 ++iter2) { 404 Element &element2 = *iter2; 405 Element *element1 = (*elements_)[element2.state_id]; 406 if (!element1 || element1->weight != element2.weight) { 407 // Mismatch found. Resets element vector before returning false. 408 for (typename Subset::iterator iter1 = subset1->begin(); 409 iter1 != subset1->end(); 410 ++iter1) 411 (*elements_)[iter1->state_id] = 0; 412 return false; 413 } else { 414 (*elements_)[element2.state_id] = 0; // Clears entry 415 } 416 } 417 return true; 418 } 419 private: 420 vector<Element *> *elements_; 421 }; 422 423 // Hash function for Subset to Fst states. Subset elements are not 424 // sorted in this implementation, so the hash must be invariant 425 // under subset reordering. 426 class SubsetKey { 427 public: 428 size_t operator()(const Subset* subset) const { 429 size_t hash = 0; 430 for (typename Subset::const_iterator iter = subset->begin(); 431 iter != subset->end(); 432 ++iter) { 433 const Element &element = *iter; 434 int lshift = element.state_id % kPrime; 435 int rshift = sizeof(size_t) - lshift; 436 hash ^= element.state_id << lshift ^ 437 element.state_id >> rshift ^ 438 element.weight.Hash(); 439 } 440 return hash; 441 } 442 443 private: 444 static const int kPrime = sizeof(size_t) == 8 ? 23 : 13; 445 }; 446 447 float delta_; // Quantization delta for subset weights 448 C common_divisor_; 449 450 // Used to test equivalence of subsets. 451 vector<Element *> elements_; 452 453 // Maps from StateId to Subset. 454 vector<Subset *> subsets_; 455 456 // Hashes from Subset to its StateId in the output automaton. 457 typedef hash_map<Subset *, StateId, SubsetKey, SubsetEqual> 458 SubsetHash; 459 460 // Hashes from Label to Subsets corr. to destination states of current state. 461 SubsetHash subset_hash_; 462 463 DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFsaImpl); 464 }; 465 466 467 // Implementation of delayed determinization for transducers. 468 // Transducer determinization is implemented by mapping the input to 469 // the Gallic semiring as an acceptor whose weights contain the output 470 // strings and using acceptor determinization above to determinize 471 // that acceptor. 472 template <class A, StringType S> 473 class DeterminizeFstImpl : public DeterminizeFstImplBase<A> { 474 public: 475 typedef typename A::Label Label; 476 typedef typename A::Weight Weight; 477 typedef typename A::StateId StateId; 478 479 typedef ToGallicMapper<A, S> ToMapper; 480 typedef FromGallicMapper<A, S> FromMapper; 481 482 typedef typename ToMapper::ToArc ToArc; 483 typedef MapFst<A, ToArc, ToMapper> ToFst; 484 typedef MapFst<ToArc, A, FromMapper> FromFst; 485 486 typedef GallicCommonDivisor<Label, Weight, S> CommonDivisor; 487 typedef GallicFactor<Label, Weight, S> FactorIterator; 488 489 // Defined after DeterminizeFst since it calls it. 490 DeterminizeFstImpl(const Fst<A> &fst, const DeterminizeFstOptions &opts); 491 492 ~DeterminizeFstImpl() { delete from_fst_; } 493 494 virtual StateId ComputeStart() { return from_fst_->Start(); } 495 496 virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); } 497 498 virtual void Expand(StateId s) { 499 for (ArcIterator<FromFst> aiter(*from_fst_, s); 500 !aiter.Done(); 501 aiter.Next()) 502 CacheImpl<A>::AddArc(s, aiter.Value()); 503 CacheImpl<A>::SetArcs(s); 504 } 505 506 private: 507 FromFst *from_fst_; 508 509 DISALLOW_EVIL_CONSTRUCTORS(DeterminizeFstImpl); 510 }; 511 512 513 // Determinizes a weighted transducer. This version is a delayed 514 // Fst. The result will be an equivalent FST that has the property 515 // that no state has two transitions with the same input label. 516 // For this algorithm, epsilon transitions are treated as regular 517 // symbols (cf. RmEpsilon). 518 // 519 // The transducer must be functional. The weights must be (weakly) 520 // left divisible (valid for TropicalWeight and LogWeight). 521 // 522 // Complexity: 523 // - Determinizable: exponential (polynomial in the size of the output) 524 // - Non-determinizable) does not terminate 525 // 526 // The determinizable automata include all unweighted and all acyclic input. 527 // 528 // References: 529 // - Mehryar Mohri, "Finite-State Transducers in Language and Speech 530 // Processing". Computational Linguistics, 23:2, 1997. 531 template <class A> 532 class DeterminizeFst : public Fst<A> { 533 public: 534 friend class ArcIterator< DeterminizeFst<A> >; 535 friend class CacheStateIterator< DeterminizeFst<A> >; 536 friend class CacheArcIterator< DeterminizeFst<A> >; 537 template <class B, StringType S> friend class DeterminizeFstImpl; 538 539 typedef A Arc; 540 typedef typename A::Weight Weight; 541 typedef typename A::StateId StateId; 542 typedef typename A::Label Label; 543 typedef CacheState<A> State; 544 545 explicit DeterminizeFst(const Fst<A> &fst, 546 const DeterminizeFstOptions &opts = DeterminizeFstOptions()) { 547 if (fst.Properties(kAcceptor, true)) { 548 // Calls implementation for acceptors. 549 typedef DefaultCommonDivisor<Weight> D; 550 impl_ = new DeterminizeFsaImpl<A, D>(fst, D(), opts); 551 } else { 552 // Calls implementation for transducers. 553 impl_ = new DeterminizeFstImpl<A, STRING_LEFT_RESTRICT>(fst, opts); 554 } 555 } 556 557 DeterminizeFst(const DeterminizeFst<A> &fst) : Fst<A>(fst), impl_(fst.impl_) { 558 impl_->IncrRefCount(); 559 } 560 561 virtual ~DeterminizeFst() { if (!impl_->DecrRefCount()) delete impl_; } 562 563 virtual StateId Start() const { return impl_->Start(); } 564 565 virtual Weight Final(StateId s) const { return impl_->Final(s); } 566 567 virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); } 568 569 virtual size_t NumInputEpsilons(StateId s) const { 570 return impl_->NumInputEpsilons(s); 571 } 572 573 virtual size_t NumOutputEpsilons(StateId s) const { 574 return impl_->NumOutputEpsilons(s); 575 } 576 577 virtual uint64 Properties(uint64 mask, bool test) const { 578 if (test) { 579 uint64 known, test = TestProperties(*this, mask, &known); 580 impl_->SetProperties(test, known); 581 return test & mask; 582 } else { 583 return impl_->Properties(mask); 584 } 585 } 586 587 virtual const string& Type() const { return impl_->Type(); } 588 589 virtual DeterminizeFst<A> *Copy() const { 590 return new DeterminizeFst<A>(*this); 591 } 592 593 virtual const SymbolTable* InputSymbols() const { 594 return impl_->InputSymbols(); 595 } 596 597 virtual const SymbolTable* OutputSymbols() const { 598 return impl_->OutputSymbols(); 599 } 600 601 virtual inline void InitStateIterator(StateIteratorData<A> *data) const; 602 603 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 604 impl_->InitArcIterator(s, data); 605 } 606 607 protected: 608 DeterminizeFstImplBase<A> *Impl() { return impl_; } 609 610 private: 611 // This private version is for passing the common divisor to 612 // FSA determinization. 613 template <class D> 614 DeterminizeFst(const Fst<A> &fst, const D &common_divisor, 615 const DeterminizeFstOptions &opts) 616 : impl_(new DeterminizeFsaImpl<A, D>(fst, common_divisor, opts)) {} 617 618 DeterminizeFstImplBase<A> *impl_; 619 620 void operator=(const DeterminizeFst<A> &fst); // Disallow 621 }; 622 623 624 template <class A, StringType S> 625 DeterminizeFstImpl<A, S>::DeterminizeFstImpl( 626 const Fst<A> &fst, const DeterminizeFstOptions &opts) 627 : DeterminizeFstImplBase<A>(fst, opts) { 628 629 // Mapper to an acceptor. 630 ToFst to_fst(fst, ToMapper()); 631 632 // Determinize acceptor. 633 // This recursive call terminates since it passes the common divisor 634 // to a private constructor. 635 DeterminizeFst<ToArc> det_fsa(to_fst, CommonDivisor(), opts); 636 637 // Mapper back to transducer. 638 FactorWeightOptions fopts(CacheOptions(true, 0), opts.delta, true); 639 FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts); 640 from_fst_ = new FromFst(factored_fst, FromMapper()); 641 } 642 643 644 // Specialization for DeterminizeFst. 645 template <class A> 646 class StateIterator< DeterminizeFst<A> > 647 : public CacheStateIterator< DeterminizeFst<A> > { 648 public: 649 explicit StateIterator(const DeterminizeFst<A> &fst) 650 : CacheStateIterator< DeterminizeFst<A> >(fst) {} 651 }; 652 653 654 // Specialization for DeterminizeFst. 655 template <class A> 656 class ArcIterator< DeterminizeFst<A> > 657 : public CacheArcIterator< DeterminizeFst<A> > { 658 public: 659 typedef typename A::StateId StateId; 660 661 ArcIterator(const DeterminizeFst<A> &fst, StateId s) 662 : CacheArcIterator< DeterminizeFst<A> >(fst, s) { 663 if (!fst.impl_->HasArcs(s)) 664 fst.impl_->Expand(s); 665 } 666 667 private: 668 DISALLOW_EVIL_CONSTRUCTORS(ArcIterator); 669 }; 670 671 672 template <class A> inline 673 void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const 674 { 675 data->base = new StateIterator< DeterminizeFst<A> >(*this); 676 } 677 678 679 // Useful aliases when using StdArc. 680 typedef DeterminizeFst<StdArc> StdDeterminizeFst; 681 682 683 struct DeterminizeOptions { 684 float delta; // Quantization delta for subset weights 685 686 explicit DeterminizeOptions(float d) : delta(d) {} 687 DeterminizeOptions() :delta(kDelta) {} 688 }; 689 690 691 // Determinizes a weighted transducer. This version writes the 692 // determinized Fst to an output MutableFst. The result will be an 693 // equivalent FSt that has the property that no state has two 694 // transitions with the same input label. For this algorithm, epsilon 695 // transitions are treated as regular symbols (cf. RmEpsilon). 696 // 697 // The transducer must be functional. The weights must be (weakly) 698 // left divisible (valid for TropicalWeight and LogWeight). 699 // 700 // Complexity: 701 // - Determinizable: exponential (polynomial in the size of the output) 702 // - Non-determinizable: does not terminate 703 // 704 // The determinizable automata include all unweighted and all acyclic input. 705 // 706 // References: 707 // - Mehryar Mohri, "Finite-State Transducers in Language and Speech 708 // Processing". Computational Linguistics, 23:2, 1997. 709 template <class Arc> 710 void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, 711 const DeterminizeOptions &opts = DeterminizeOptions()) { 712 DeterminizeFstOptions nopts; 713 nopts.delta = opts.delta; 714 nopts.gc_limit = 0; // Cache only the last state for fastest copy. 715 *ofst = DeterminizeFst<Arc>(ifst, nopts); 716 } 717 718 719 } // namespace fst 720 721 #endif // FST_LIB_DETERMINIZE_H__ 722