1 // arc-map.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Class to map over/transform arcs e.g., change semirings or 20 // implement project/invert. Consider using when operation does 21 // not change the number of arcs (except possibly superfinal arcs). 22 23 #ifndef FST_LIB_ARC_MAP_H__ 24 #define FST_LIB_ARC_MAP_H__ 25 26 #include <unordered_map> 27 using std::tr1::unordered_map; 28 using std::tr1::unordered_multimap; 29 #include <string> 30 #include <utility> 31 using std::pair; using std::make_pair; 32 33 #include <fst/cache.h> 34 #include <fst/mutable-fst.h> 35 36 37 namespace fst { 38 39 // This determines how final weights are mapped. 40 enum MapFinalAction { 41 // A final weight is mapped into a final weight. An error 42 // is raised if this is not possible. 43 MAP_NO_SUPERFINAL, 44 45 // A final weight is mapped to an arc to the superfinal state 46 // when the result cannot be represented as a final weight. 47 // The superfinal state will be added only if it is needed. 48 MAP_ALLOW_SUPERFINAL, 49 50 // A final weight is mapped to an arc to the superfinal state 51 // unless the result can be represented as a final weight of weight 52 // Zero(). The superfinal state is always added (if the input is 53 // not the empty Fst). 54 MAP_REQUIRE_SUPERFINAL 55 }; 56 57 // This determines how symbol tables are mapped. 58 enum MapSymbolsAction { 59 // Symbols should be cleared in the result by the map. 60 MAP_CLEAR_SYMBOLS, 61 62 // Symbols should be copied from the input FST by the map. 63 MAP_COPY_SYMBOLS, 64 65 // Symbols should not be modified in the result by the map itself. 66 // (They may set by the mapper). 67 MAP_NOOP_SYMBOLS 68 }; 69 70 // ArcMapper Interface - class determinies how arcs and final weights 71 // are mapped. Useful for implementing operations that do not change 72 // the number of arcs (expect possibly superfinal arcs). 73 // 74 // class ArcMapper { 75 // public: 76 // typedef A FromArc; 77 // typedef B ToArc; 78 // 79 // // Maps an arc type A to arc type B. 80 // B operator()(const A &arc); 81 // // Specifies final action the mapper requires (see above). 82 // // The mapper will be passed final weights as arcs of the 83 // // form A(0, 0, weight, kNoStateId). 84 // MapFinalAction FinalAction() const; 85 // // Specifies input symbol table action the mapper requires (see above). 86 // MapSymbolsAction InputSymbolsAction() const; 87 // // Specifies output symbol table action the mapper requires (see above). 88 // MapSymbolsAction OutputSymbolsAction() const; 89 // // This specifies the known properties of an Fst mapped by this 90 // // mapper. It takes as argument the input Fst's known properties. 91 // uint64 Properties(uint64 props) const; 92 // }; 93 // 94 // The ArcMap functions and classes below will use the FinalAction() 95 // method of the mapper to determine how to treat final weights, 96 // e.g. whether to add a superfinal state. They will use the Properties() 97 // method to set the result Fst properties. 98 // 99 // We include a various map versions below. One dimension of 100 // variation is whether the mapping mutates its input, writes to a 101 // new result Fst, or is an on-the-fly Fst. Another dimension is how 102 // we pass the mapper. We allow passing the mapper by pointer 103 // for cases that we need to change the state of the user's mapper. 104 // This is the case with the encode mapper, which is reused during 105 // decoding. We also include map versions that pass the mapper 106 // by value or const reference when this suffices. 107 108 109 // Maps an arc type A using a mapper function object C, passed 110 // by pointer. This version modifies its Fst input. 111 template<class A, class C> 112 void ArcMap(MutableFst<A> *fst, C* mapper) { 113 typedef typename A::StateId StateId; 114 typedef typename A::Weight Weight; 115 116 if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) 117 fst->SetInputSymbols(0); 118 119 if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) 120 fst->SetOutputSymbols(0); 121 122 if (fst->Start() == kNoStateId) 123 return; 124 125 uint64 props = fst->Properties(kFstProperties, false); 126 127 MapFinalAction final_action = mapper->FinalAction(); 128 StateId superfinal = kNoStateId; 129 if (final_action == MAP_REQUIRE_SUPERFINAL) { 130 superfinal = fst->AddState(); 131 fst->SetFinal(superfinal, Weight::One()); 132 } 133 134 for (StateId s = 0; s < fst->NumStates(); ++s) { 135 for (MutableArcIterator< MutableFst<A> > aiter(fst, s); 136 !aiter.Done(); aiter.Next()) { 137 const A &arc = aiter.Value(); 138 aiter.SetValue((*mapper)(arc)); 139 } 140 141 switch (final_action) { 142 case MAP_NO_SUPERFINAL: 143 default: { 144 A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); 145 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 146 FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; 147 fst->SetProperties(kError, kError); 148 } 149 150 fst->SetFinal(s, final_arc.weight); 151 break; 152 } 153 case MAP_ALLOW_SUPERFINAL: { 154 if (s != superfinal) { 155 A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); 156 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 157 // Add a superfinal state if not already done. 158 if (superfinal == kNoStateId) { 159 superfinal = fst->AddState(); 160 fst->SetFinal(superfinal, Weight::One()); 161 } 162 final_arc.nextstate = superfinal; 163 fst->AddArc(s, final_arc); 164 fst->SetFinal(s, Weight::Zero()); 165 } else { 166 fst->SetFinal(s, final_arc.weight); 167 } 168 break; 169 } 170 } 171 case MAP_REQUIRE_SUPERFINAL: { 172 if (s != superfinal) { 173 A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); 174 if (final_arc.ilabel != 0 || final_arc.olabel != 0 || 175 final_arc.weight != Weight::Zero()) 176 fst->AddArc(s, A(final_arc.ilabel, final_arc.olabel, 177 final_arc.weight, superfinal)); 178 fst->SetFinal(s, Weight::Zero()); 179 } 180 break; 181 } 182 } 183 } 184 fst->SetProperties(mapper->Properties(props), kFstProperties); 185 } 186 187 188 // Maps an arc type A using a mapper function object C, passed 189 // by value. This version modifies its Fst input. 190 template<class A, class C> 191 void ArcMap(MutableFst<A> *fst, C mapper) { 192 ArcMap(fst, &mapper); 193 } 194 195 196 // Maps an arc type A to an arc type B using mapper function 197 // object C, passed by pointer. This version writes the mapped 198 // input Fst to an output MutableFst. 199 template<class A, class B, class C> 200 void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { 201 typedef typename A::StateId StateId; 202 typedef typename A::Weight Weight; 203 204 ofst->DeleteStates(); 205 206 if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS) 207 ofst->SetInputSymbols(ifst.InputSymbols()); 208 else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) 209 ofst->SetInputSymbols(0); 210 211 if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS) 212 ofst->SetOutputSymbols(ifst.OutputSymbols()); 213 else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) 214 ofst->SetOutputSymbols(0); 215 216 uint64 iprops = ifst.Properties(kCopyProperties, false); 217 218 if (ifst.Start() == kNoStateId) { 219 if (iprops & kError) ofst->SetProperties(kError, kError); 220 return; 221 } 222 223 MapFinalAction final_action = mapper->FinalAction(); 224 if (ifst.Properties(kExpanded, false)) { 225 ofst->ReserveStates(CountStates(ifst) + 226 final_action == MAP_NO_SUPERFINAL ? 0 : 1); 227 } 228 229 // Add all states. 230 for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) 231 ofst->AddState(); 232 233 StateId superfinal = kNoStateId; 234 if (final_action == MAP_REQUIRE_SUPERFINAL) { 235 superfinal = ofst->AddState(); 236 ofst->SetFinal(superfinal, B::Weight::One()); 237 } 238 for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) { 239 StateId s = siter.Value(); 240 if (s == ifst.Start()) 241 ofst->SetStart(s); 242 243 ofst->ReserveArcs(s, ifst.NumArcs(s)); 244 for (ArcIterator< Fst<A> > aiter(ifst, s); !aiter.Done(); aiter.Next()) 245 ofst->AddArc(s, (*mapper)(aiter.Value())); 246 247 switch (final_action) { 248 case MAP_NO_SUPERFINAL: 249 default: { 250 B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); 251 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 252 FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; 253 ofst->SetProperties(kError, kError); 254 } 255 ofst->SetFinal(s, final_arc.weight); 256 break; 257 } 258 case MAP_ALLOW_SUPERFINAL: { 259 B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); 260 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 261 // Add a superfinal state if not already done. 262 if (superfinal == kNoStateId) { 263 superfinal = ofst->AddState(); 264 ofst->SetFinal(superfinal, B::Weight::One()); 265 } 266 final_arc.nextstate = superfinal; 267 ofst->AddArc(s, final_arc); 268 ofst->SetFinal(s, B::Weight::Zero()); 269 } else { 270 ofst->SetFinal(s, final_arc.weight); 271 } 272 break; 273 } 274 case MAP_REQUIRE_SUPERFINAL: { 275 B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); 276 if (final_arc.ilabel != 0 || final_arc.olabel != 0 || 277 final_arc.weight != B::Weight::Zero()) 278 ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel, 279 final_arc.weight, superfinal)); 280 ofst->SetFinal(s, B::Weight::Zero()); 281 break; 282 } 283 } 284 } 285 uint64 oprops = ofst->Properties(kFstProperties, false); 286 ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties); 287 } 288 289 // Maps an arc type A to an arc type B using mapper function 290 // object C, passed by value. This version writes the mapped input 291 // Fst to an output MutableFst. 292 template<class A, class B, class C> 293 void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { 294 ArcMap(ifst, ofst, &mapper); 295 } 296 297 298 struct ArcMapFstOptions : public CacheOptions { 299 // ArcMapFst default caching behaviour is to do no caching. Most 300 // mappers are cheap and therefore we save memory by not doing 301 // caching. 302 ArcMapFstOptions() : CacheOptions(true, 0) {} 303 ArcMapFstOptions(const CacheOptions& opts) : CacheOptions(opts) {} 304 }; 305 306 307 template <class A, class B, class C> class ArcMapFst; 308 309 // Implementation of delayed ArcMapFst. 310 template <class A, class B, class C> 311 class ArcMapFstImpl : public CacheImpl<B> { 312 public: 313 using FstImpl<B>::SetType; 314 using FstImpl<B>::SetProperties; 315 using FstImpl<B>::SetInputSymbols; 316 using FstImpl<B>::SetOutputSymbols; 317 318 using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates; 319 320 using CacheImpl<B>::PushArc; 321 using CacheImpl<B>::HasArcs; 322 using CacheImpl<B>::HasFinal; 323 using CacheImpl<B>::HasStart; 324 using CacheImpl<B>::SetArcs; 325 using CacheImpl<B>::SetFinal; 326 using CacheImpl<B>::SetStart; 327 328 friend class StateIterator< ArcMapFst<A, B, C> >; 329 330 typedef B Arc; 331 typedef typename B::Weight Weight; 332 typedef typename B::StateId StateId; 333 334 ArcMapFstImpl(const Fst<A> &fst, const C &mapper, 335 const ArcMapFstOptions& opts) 336 : CacheImpl<B>(opts), 337 fst_(fst.Copy()), 338 mapper_(new C(mapper)), 339 own_mapper_(true), 340 superfinal_(kNoStateId), 341 nstates_(0) { 342 Init(); 343 } 344 345 ArcMapFstImpl(const Fst<A> &fst, C *mapper, 346 const ArcMapFstOptions& opts) 347 : CacheImpl<B>(opts), 348 fst_(fst.Copy()), 349 mapper_(mapper), 350 own_mapper_(false), 351 superfinal_(kNoStateId), 352 nstates_(0) { 353 Init(); 354 } 355 356 ArcMapFstImpl(const ArcMapFstImpl<A, B, C> &impl) 357 : CacheImpl<B>(impl), 358 fst_(impl.fst_->Copy(true)), 359 mapper_(new C(*impl.mapper_)), 360 own_mapper_(true), 361 superfinal_(kNoStateId), 362 nstates_(0) { 363 Init(); 364 } 365 366 ~ArcMapFstImpl() { 367 delete fst_; 368 if (own_mapper_) delete mapper_; 369 } 370 371 StateId Start() { 372 if (!HasStart()) 373 SetStart(FindOState(fst_->Start())); 374 return CacheImpl<B>::Start(); 375 } 376 377 Weight Final(StateId s) { 378 if (!HasFinal(s)) { 379 switch (final_action_) { 380 case MAP_NO_SUPERFINAL: 381 default: { 382 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 383 kNoStateId)); 384 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 385 FSTERROR() << "ArcMapFst: non-zero arc labels for superfinal arc"; 386 SetProperties(kError, kError); 387 } 388 SetFinal(s, final_arc.weight); 389 break; 390 } 391 case MAP_ALLOW_SUPERFINAL: { 392 if (s == superfinal_) { 393 SetFinal(s, Weight::One()); 394 } else { 395 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 396 kNoStateId)); 397 if (final_arc.ilabel == 0 && final_arc.olabel == 0) 398 SetFinal(s, final_arc.weight); 399 else 400 SetFinal(s, Weight::Zero()); 401 } 402 break; 403 } 404 case MAP_REQUIRE_SUPERFINAL: { 405 SetFinal(s, s == superfinal_ ? Weight::One() : Weight::Zero()); 406 break; 407 } 408 } 409 } 410 return CacheImpl<B>::Final(s); 411 } 412 413 size_t NumArcs(StateId s) { 414 if (!HasArcs(s)) 415 Expand(s); 416 return CacheImpl<B>::NumArcs(s); 417 } 418 419 size_t NumInputEpsilons(StateId s) { 420 if (!HasArcs(s)) 421 Expand(s); 422 return CacheImpl<B>::NumInputEpsilons(s); 423 } 424 425 size_t NumOutputEpsilons(StateId s) { 426 if (!HasArcs(s)) 427 Expand(s); 428 return CacheImpl<B>::NumOutputEpsilons(s); 429 } 430 431 uint64 Properties() const { return Properties(kFstProperties); } 432 433 // Set error if found; return FST impl properties. 434 uint64 Properties(uint64 mask) const { 435 if ((mask & kError) && (fst_->Properties(kError, false) || 436 (mapper_->Properties(0) & kError))) 437 SetProperties(kError, kError); 438 return FstImpl<Arc>::Properties(mask); 439 } 440 441 void InitArcIterator(StateId s, ArcIteratorData<B> *data) { 442 if (!HasArcs(s)) 443 Expand(s); 444 CacheImpl<B>::InitArcIterator(s, data); 445 } 446 447 void Expand(StateId s) { 448 // Add exiting arcs. 449 if (s == superfinal_) { SetArcs(s); return; } 450 451 for (ArcIterator< Fst<A> > aiter(*fst_, FindIState(s)); 452 !aiter.Done(); aiter.Next()) { 453 A aarc(aiter.Value()); 454 aarc.nextstate = FindOState(aarc.nextstate); 455 const B& barc = (*mapper_)(aarc); 456 PushArc(s, barc); 457 } 458 459 // Check for superfinal arcs. 460 if (!HasFinal(s) || Final(s) == Weight::Zero()) 461 switch (final_action_) { 462 case MAP_NO_SUPERFINAL: 463 default: 464 break; 465 case MAP_ALLOW_SUPERFINAL: { 466 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 467 kNoStateId)); 468 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 469 if (superfinal_ == kNoStateId) 470 superfinal_ = nstates_++; 471 final_arc.nextstate = superfinal_; 472 PushArc(s, final_arc); 473 } 474 break; 475 } 476 case MAP_REQUIRE_SUPERFINAL: { 477 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 478 kNoStateId)); 479 if (final_arc.ilabel != 0 || final_arc.olabel != 0 || 480 final_arc.weight != B::Weight::Zero()) 481 PushArc(s, B(final_arc.ilabel, final_arc.olabel, 482 final_arc.weight, superfinal_)); 483 break; 484 } 485 } 486 SetArcs(s); 487 } 488 489 private: 490 void Init() { 491 SetType("map"); 492 493 if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS) 494 SetInputSymbols(fst_->InputSymbols()); 495 else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) 496 SetInputSymbols(0); 497 498 if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS) 499 SetOutputSymbols(fst_->OutputSymbols()); 500 else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) 501 SetOutputSymbols(0); 502 503 if (fst_->Start() == kNoStateId) { 504 final_action_ = MAP_NO_SUPERFINAL; 505 SetProperties(kNullProperties); 506 } else { 507 final_action_ = mapper_->FinalAction(); 508 uint64 props = fst_->Properties(kCopyProperties, false); 509 SetProperties(mapper_->Properties(props)); 510 if (final_action_ == MAP_REQUIRE_SUPERFINAL) 511 superfinal_ = 0; 512 } 513 } 514 515 // Maps from output state to input state. 516 StateId FindIState(StateId s) { 517 if (superfinal_ == kNoStateId || s < superfinal_) 518 return s; 519 else 520 return s - 1; 521 } 522 523 // Maps from input state to output state. 524 StateId FindOState(StateId is) { 525 StateId os; 526 if (superfinal_ == kNoStateId || is < superfinal_) 527 os = is; 528 else 529 os = is + 1; 530 531 if (os >= nstates_) 532 nstates_ = os + 1; 533 534 return os; 535 } 536 537 538 const Fst<A> *fst_; 539 C* mapper_; 540 bool own_mapper_; 541 MapFinalAction final_action_; 542 543 StateId superfinal_; 544 StateId nstates_; 545 546 void operator=(const ArcMapFstImpl<A, B, C> &); // disallow 547 }; 548 549 550 // Maps an arc type A to an arc type B using Mapper function object 551 // C. This version is a delayed Fst. 552 template <class A, class B, class C> 553 class ArcMapFst : public ImplToFst< ArcMapFstImpl<A, B, C> > { 554 public: 555 friend class ArcIterator< ArcMapFst<A, B, C> >; 556 friend class StateIterator< ArcMapFst<A, B, C> >; 557 558 typedef B Arc; 559 typedef typename B::Weight Weight; 560 typedef typename B::StateId StateId; 561 typedef CacheState<B> State; 562 typedef ArcMapFstImpl<A, B, C> Impl; 563 564 ArcMapFst(const Fst<A> &fst, const C &mapper, const ArcMapFstOptions& opts) 565 : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} 566 567 ArcMapFst(const Fst<A> &fst, C* mapper, const ArcMapFstOptions& opts) 568 : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} 569 570 ArcMapFst(const Fst<A> &fst, const C &mapper) 571 : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} 572 573 ArcMapFst(const Fst<A> &fst, C* mapper) 574 : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} 575 576 // See Fst<>::Copy() for doc. 577 ArcMapFst(const ArcMapFst<A, B, C> &fst, bool safe = false) 578 : ImplToFst<Impl>(fst, safe) {} 579 580 // Get a copy of this ArcMapFst. See Fst<>::Copy() for further doc. 581 virtual ArcMapFst<A, B, C> *Copy(bool safe = false) const { 582 return new ArcMapFst<A, B, C>(*this, safe); 583 } 584 585 virtual inline void InitStateIterator(StateIteratorData<B> *data) const; 586 587 virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { 588 GetImpl()->InitArcIterator(s, data); 589 } 590 591 private: 592 // Makes visible to friends. 593 Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } 594 595 void operator=(const ArcMapFst<A, B, C> &fst); // disallow 596 }; 597 598 599 // Specialization for ArcMapFst. 600 template<class A, class B, class C> 601 class StateIterator< ArcMapFst<A, B, C> > : public StateIteratorBase<B> { 602 public: 603 typedef typename B::StateId StateId; 604 605 explicit StateIterator(const ArcMapFst<A, B, C> &fst) 606 : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0), 607 superfinal_(impl_->final_action_ == MAP_REQUIRE_SUPERFINAL) 608 { CheckSuperfinal(); } 609 610 bool Done() const { return siter_.Done() && !superfinal_; } 611 612 StateId Value() const { return s_; } 613 614 void Next() { 615 ++s_; 616 if (!siter_.Done()) { 617 siter_.Next(); 618 CheckSuperfinal(); 619 } 620 else if (superfinal_) 621 superfinal_ = false; 622 } 623 624 void Reset() { 625 s_ = 0; 626 siter_.Reset(); 627 superfinal_ = impl_->final_action_ == MAP_REQUIRE_SUPERFINAL; 628 CheckSuperfinal(); 629 } 630 631 private: 632 // This allows base-class virtual access to non-virtual derived- 633 // class members of the same name. It makes the derived class more 634 // efficient to use but unsafe to further derive. 635 bool Done_() const { return Done(); } 636 StateId Value_() const { return Value(); } 637 void Next_() { Next(); } 638 void Reset_() { Reset(); } 639 640 void CheckSuperfinal() { 641 if (impl_->final_action_ != MAP_ALLOW_SUPERFINAL || superfinal_) 642 return; 643 if (!siter_.Done()) { 644 B final_arc = (*impl_->mapper_)(A(0, 0, impl_->fst_->Final(s_), 645 kNoStateId)); 646 if (final_arc.ilabel != 0 || final_arc.olabel != 0) 647 superfinal_ = true; 648 } 649 } 650 651 const ArcMapFstImpl<A, B, C> *impl_; 652 StateIterator< Fst<A> > siter_; 653 StateId s_; 654 bool superfinal_; // true if there is a superfinal state and not done 655 656 DISALLOW_COPY_AND_ASSIGN(StateIterator); 657 }; 658 659 660 // Specialization for ArcMapFst. 661 template <class A, class B, class C> 662 class ArcIterator< ArcMapFst<A, B, C> > 663 : public CacheArcIterator< ArcMapFst<A, B, C> > { 664 public: 665 typedef typename A::StateId StateId; 666 667 ArcIterator(const ArcMapFst<A, B, C> &fst, StateId s) 668 : CacheArcIterator< ArcMapFst<A, B, C> >(fst.GetImpl(), s) { 669 if (!fst.GetImpl()->HasArcs(s)) 670 fst.GetImpl()->Expand(s); 671 } 672 673 private: 674 DISALLOW_COPY_AND_ASSIGN(ArcIterator); 675 }; 676 677 template <class A, class B, class C> inline 678 void ArcMapFst<A, B, C>::InitStateIterator(StateIteratorData<B> *data) 679 const { 680 data->base = new StateIterator< ArcMapFst<A, B, C> >(*this); 681 } 682 683 684 // 685 // Utility Mappers 686 // 687 688 // Mapper that returns its input. 689 template <class A> 690 struct IdentityArcMapper { 691 typedef A FromArc; 692 typedef A ToArc; 693 694 A operator()(const A &arc) const { return arc; } 695 696 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 697 698 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 699 700 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 701 702 uint64 Properties(uint64 props) const { return props; } 703 }; 704 705 706 // Mapper that returns its input with final states redirected to 707 // a single super-final state. 708 template <class A> 709 struct SuperFinalMapper { 710 typedef A FromArc; 711 typedef A ToArc; 712 713 A operator()(const A &arc) const { return arc; } 714 715 MapFinalAction FinalAction() const { return MAP_REQUIRE_SUPERFINAL; } 716 717 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 718 719 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 720 721 uint64 Properties(uint64 props) const { 722 return props & kAddSuperFinalProperties; 723 } 724 }; 725 726 727 // Mapper that leaves labels and nextstate unchanged and constructs a new weight 728 // from the underlying value of the arc weight. Requires that there is a 729 // WeightConvert class specialization that converts the weights. 730 template <class A, class B> 731 class WeightConvertMapper { 732 public: 733 typedef A FromArc; 734 typedef B ToArc; 735 typedef typename FromArc::Weight FromWeight; 736 typedef typename ToArc::Weight ToWeight; 737 738 ToArc operator()(const FromArc &arc) const { 739 return ToArc(arc.ilabel, arc.olabel, 740 convert_weight_(arc.weight), arc.nextstate); 741 } 742 743 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 744 745 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 746 747 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 748 749 uint64 Properties(uint64 props) const { return props; } 750 751 private: 752 WeightConvert<FromWeight, ToWeight> convert_weight_; 753 }; 754 755 // Non-precision-changing weight conversions. 756 // Consider using more efficient Cast (fst.h) instead. 757 typedef WeightConvertMapper<StdArc, LogArc> StdToLogMapper; 758 typedef WeightConvertMapper<LogArc, StdArc> LogToStdMapper; 759 760 // Precision-changing weight conversions. 761 typedef WeightConvertMapper<StdArc, Log64Arc> StdToLog64Mapper; 762 typedef WeightConvertMapper<LogArc, Log64Arc> LogToLog64Mapper; 763 typedef WeightConvertMapper<Log64Arc, StdArc> Log64ToStdMapper; 764 typedef WeightConvertMapper<Log64Arc, LogArc> Log64ToLogMapper; 765 766 // Mapper from A to GallicArc<A>. 767 template <class A, StringType S = STRING_LEFT> 768 struct ToGallicMapper { 769 typedef A FromArc; 770 typedef GallicArc<A, S> ToArc; 771 772 typedef StringWeight<typename A::Label, S> SW; 773 typedef typename A::Weight AW; 774 typedef typename GallicArc<A, S>::Weight GW; 775 776 ToArc operator()(const A &arc) const { 777 // 'Super-final' arc. 778 if (arc.nextstate == kNoStateId && arc.weight != AW::Zero()) 779 return ToArc(0, 0, GW(SW::One(), arc.weight), kNoStateId); 780 // 'Super-non-final' arc. 781 else if (arc.nextstate == kNoStateId) 782 return ToArc(0, 0, GW(SW::Zero(), arc.weight), kNoStateId); 783 // Epsilon label. 784 else if (arc.olabel == 0) 785 return ToArc(arc.ilabel, arc.ilabel, 786 GW(SW::One(), arc.weight), arc.nextstate); 787 // Regular label. 788 else 789 return ToArc(arc.ilabel, arc.ilabel, 790 GW(SW(arc.olabel), arc.weight), arc.nextstate); 791 } 792 793 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 794 795 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 796 797 MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} 798 799 uint64 Properties(uint64 props) const { 800 return ProjectProperties(props, true) & kWeightInvariantProperties; 801 } 802 }; 803 804 805 // Mapper from GallicArc<A> to A. 806 template <class A, StringType S = STRING_LEFT> 807 struct FromGallicMapper { 808 typedef GallicArc<A, S> FromArc; 809 typedef A ToArc; 810 811 typedef typename A::Label Label; 812 typedef StringWeight<Label, S> SW; 813 typedef typename A::Weight AW; 814 typedef typename GallicArc<A, S>::Weight GW; 815 816 FromGallicMapper(Label superfinal_label = 0) 817 : superfinal_label_(superfinal_label), error_(false) {} 818 819 A operator()(const FromArc &arc) const { 820 // 'Super-non-final' arc. 821 if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) 822 return A(arc.ilabel, 0, AW::Zero(), kNoStateId); 823 824 SW w1 = arc.weight.Value1(); 825 AW w2 = arc.weight.Value2(); 826 StringWeightIterator<Label, S> iter1(w1); 827 828 Label l = w1.Size() == 1 ? iter1.Value() : 0; 829 830 if (l == kStringInfinity || l == kStringBad || 831 arc.ilabel != arc.olabel || w1.Size() > 1) { 832 FSTERROR() << "FromGallicMapper: unrepesentable weight"; 833 error_ = true; 834 } 835 836 if (arc.ilabel == 0 && l != 0 && arc.nextstate == kNoStateId) 837 return A(superfinal_label_, l, w2, arc.nextstate); 838 else 839 return A(arc.ilabel, l, w2, arc.nextstate); 840 } 841 842 MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } 843 844 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 845 846 MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} 847 848 uint64 Properties(uint64 inprops) const { 849 uint64 outprops = inprops & kOLabelInvariantProperties & 850 kWeightInvariantProperties & kAddSuperFinalProperties; 851 if (error_) 852 outprops |= kError; 853 return outprops; 854 } 855 856 private: 857 Label superfinal_label_; 858 mutable bool error_; 859 }; 860 861 862 // Mapper from GallicArc<A> to A. 863 template <class A, StringType S = STRING_LEFT> 864 struct GallicToNewSymbolsMapper { 865 typedef GallicArc<A, S> FromArc; 866 typedef A ToArc; 867 868 typedef typename A::StateId StateId; 869 typedef typename A::Label Label; 870 typedef StringWeight<Label, S> SW; 871 typedef typename A::Weight AW; 872 typedef typename GallicArc<A, S>::Weight GW; 873 874 GallicToNewSymbolsMapper(MutableFst<ToArc> *fst) 875 : fst_(fst), lmax_(0), osymbols_(fst->OutputSymbols()), 876 isymbols_(0), error_(false) { 877 fst_->DeleteStates(); 878 state_ = fst_->AddState(); 879 fst_->SetStart(state_); 880 fst_->SetFinal(state_, AW::One()); 881 if (osymbols_) { 882 string name = osymbols_->Name() + "_from_gallic"; 883 fst_->SetInputSymbols(new SymbolTable(name)); 884 isymbols_ = fst_->MutableInputSymbols(); 885 isymbols_->AddSymbol(osymbols_->Find((int64) 0), 0); 886 } else { 887 fst_->SetInputSymbols(0); 888 } 889 } 890 891 A operator()(const FromArc &arc) { 892 // 'Super-non-final' arc. 893 if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) 894 return A(arc.ilabel, 0, AW::Zero(), kNoStateId); 895 896 SW w1 = arc.weight.Value1(); 897 AW w2 = arc.weight.Value2(); 898 Label l; 899 900 if (w1.Size() == 0) { 901 l = 0; 902 } else { 903 typename Map::iterator miter = map_.find(w1); 904 if (miter != map_.end()) { 905 l = (*miter).second; 906 } else { 907 l = ++lmax_; 908 map_.insert(pair<const SW, Label>(w1, l)); 909 StringWeightIterator<Label, S> iter1(w1); 910 StateId n; 911 string s; 912 for(size_t i = 0, p = state_; 913 i < w1.Size(); 914 ++i, iter1.Next(), p = n) { 915 n = i == w1.Size() - 1 ? state_ : fst_->AddState(); 916 fst_->AddArc(p, ToArc(i ? 0 : l, iter1.Value(), AW::One(), n)); 917 if (isymbols_) { 918 if (i) s = s + "_"; 919 s = s + osymbols_->Find(iter1.Value()); 920 } 921 } 922 if (isymbols_) 923 isymbols_->AddSymbol(s, l); 924 } 925 } 926 927 if (l == kStringInfinity || l == kStringBad || arc.ilabel != arc.olabel) { 928 FSTERROR() << "GallicToNewSymbolMapper: unrepesentable weight"; 929 error_ = true; 930 } 931 932 return A(arc.ilabel, l, w2, arc.nextstate); 933 } 934 935 MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } 936 937 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 938 939 MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } 940 941 uint64 Properties(uint64 inprops) const { 942 uint64 outprops = inprops & kOLabelInvariantProperties & 943 kWeightInvariantProperties & kAddSuperFinalProperties; 944 if (error_) 945 outprops |= kError; 946 return outprops; 947 } 948 949 private: 950 class StringKey { 951 public: 952 size_t operator()(const SW &x) const { 953 return x.Hash(); 954 } 955 }; 956 957 typedef unordered_map<SW, Label, StringKey> Map; 958 959 MutableFst<ToArc> *fst_; 960 Map map_; 961 Label lmax_; 962 StateId state_; 963 const SymbolTable *osymbols_; 964 SymbolTable *isymbols_; 965 mutable bool error_; 966 967 DISALLOW_COPY_AND_ASSIGN(GallicToNewSymbolsMapper); 968 }; 969 970 971 // Mapper to add a constant to all weights. 972 template <class A> 973 struct PlusMapper { 974 typedef A FromArc; 975 typedef A ToArc; 976 typedef typename A::Weight Weight; 977 978 explicit PlusMapper(Weight w) : weight_(w) {} 979 980 A operator()(const A &arc) const { 981 if (arc.weight == Weight::Zero()) 982 return arc; 983 Weight w = Plus(arc.weight, weight_); 984 return A(arc.ilabel, arc.olabel, w, arc.nextstate); 985 } 986 987 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 988 989 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 990 991 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 992 993 uint64 Properties(uint64 props) const { 994 return props & kWeightInvariantProperties; 995 } 996 997 private: 998 999 1000 1001 Weight weight_; 1002 }; 1003 1004 1005 // Mapper to (right) multiply a constant to all weights. 1006 template <class A> 1007 struct TimesMapper { 1008 typedef A FromArc; 1009 typedef A ToArc; 1010 typedef typename A::Weight Weight; 1011 1012 explicit TimesMapper(Weight w) : weight_(w) {} 1013 1014 A operator()(const A &arc) const { 1015 if (arc.weight == Weight::Zero()) 1016 return arc; 1017 Weight w = Times(arc.weight, weight_); 1018 return A(arc.ilabel, arc.olabel, w, arc.nextstate); 1019 } 1020 1021 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1022 1023 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1024 1025 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1026 1027 uint64 Properties(uint64 props) const { 1028 return props & kWeightInvariantProperties; 1029 } 1030 1031 private: 1032 Weight weight_; 1033 }; 1034 1035 1036 // Mapper to reciprocate all non-Zero() weights. 1037 template <class A> 1038 struct InvertWeightMapper { 1039 typedef A FromArc; 1040 typedef A ToArc; 1041 typedef typename A::Weight Weight; 1042 1043 A operator()(const A &arc) const { 1044 if (arc.weight == Weight::Zero()) 1045 return arc; 1046 Weight w = Divide(Weight::One(), arc.weight); 1047 return A(arc.ilabel, arc.olabel, w, arc.nextstate); 1048 } 1049 1050 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1051 1052 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1053 1054 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1055 1056 uint64 Properties(uint64 props) const { 1057 return props & kWeightInvariantProperties; 1058 } 1059 }; 1060 1061 1062 // Mapper to map all non-Zero() weights to One(). 1063 template <class A, class B = A> 1064 struct RmWeightMapper { 1065 typedef A FromArc; 1066 typedef B ToArc; 1067 typedef typename FromArc::Weight FromWeight; 1068 typedef typename ToArc::Weight ToWeight; 1069 1070 B operator()(const A &arc) const { 1071 ToWeight w = arc.weight != FromWeight::Zero() ? 1072 ToWeight::One() : ToWeight::Zero(); 1073 return B(arc.ilabel, arc.olabel, w, arc.nextstate); 1074 } 1075 1076 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1077 1078 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1079 1080 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1081 1082 uint64 Properties(uint64 props) const { 1083 return (props & kWeightInvariantProperties) | kUnweighted; 1084 } 1085 }; 1086 1087 1088 // Mapper to quantize all weights. 1089 template <class A, class B = A> 1090 struct QuantizeMapper { 1091 typedef A FromArc; 1092 typedef B ToArc; 1093 typedef typename FromArc::Weight FromWeight; 1094 typedef typename ToArc::Weight ToWeight; 1095 1096 QuantizeMapper() : delta_(kDelta) {} 1097 1098 explicit QuantizeMapper(float d) : delta_(d) {} 1099 1100 B operator()(const A &arc) const { 1101 ToWeight w = arc.weight.Quantize(delta_); 1102 return B(arc.ilabel, arc.olabel, w, arc.nextstate); 1103 } 1104 1105 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1106 1107 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1108 1109 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1110 1111 uint64 Properties(uint64 props) const { 1112 return props & kWeightInvariantProperties; 1113 } 1114 1115 private: 1116 float delta_; 1117 }; 1118 1119 1120 // Mapper from A to B under the assumption: 1121 // B::Weight = A::Weight::ReverseWeight 1122 // B::Label == A::Label 1123 // B::StateId == A::StateId 1124 // The weight is reversed, while the label and nextstate preserved 1125 // in the mapping. 1126 template <class A, class B> 1127 struct ReverseWeightMapper { 1128 typedef A FromArc; 1129 typedef B ToArc; 1130 1131 B operator()(const A &arc) const { 1132 return B(arc.ilabel, arc.olabel, arc.weight.Reverse(), arc.nextstate); 1133 } 1134 1135 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1136 1137 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1138 1139 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1140 1141 uint64 Properties(uint64 props) const { return props; } 1142 }; 1143 1144 } // namespace fst 1145 1146 #endif // FST_LIB_ARC_MAP_H__ 1147