1 // arc-map.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Class to map over/transform arcs e.g., change semirings or 20 // implement project/invert. Consider using when operation does 21 // not change the number of arcs (except possibly superfinal arcs). 22 23 #ifndef FST_LIB_ARC_MAP_H__ 24 #define FST_LIB_ARC_MAP_H__ 25 26 #include <tr1/unordered_map> 27 using std::tr1::unordered_map; 28 using std::tr1::unordered_multimap; 29 #include <string> 30 #include <utility> 31 using std::pair; using std::make_pair; 32 33 #include <fst/cache.h> 34 #include <fst/mutable-fst.h> 35 36 37 namespace fst { 38 39 // This determines how final weights are mapped. 40 enum MapFinalAction { 41 // A final weight is mapped into a final weight. An error 42 // is raised if this is not possible. 43 MAP_NO_SUPERFINAL, 44 45 // A final weight is mapped to an arc to the superfinal state 46 // when the result cannot be represented as a final weight. 47 // The superfinal state will be added only if it is needed. 48 MAP_ALLOW_SUPERFINAL, 49 50 // A final weight is mapped to an arc to the superfinal state 51 // unless the result can be represented as a final weight of weight 52 // Zero(). The superfinal state is always added (if the input is 53 // not the empty Fst). 54 MAP_REQUIRE_SUPERFINAL 55 }; 56 57 // This determines how symbol tables are mapped. 58 enum MapSymbolsAction { 59 // Symbols should be cleared in the result by the map. 60 MAP_CLEAR_SYMBOLS, 61 62 // Symbols should be copied from the input FST by the map. 63 MAP_COPY_SYMBOLS, 64 65 // Symbols should not be modified in the result by the map itself. 66 // (They may set by the mapper). 67 MAP_NOOP_SYMBOLS 68 }; 69 70 // ArcMapper Interface - class determinies how arcs and final weights 71 // are mapped. Useful for implementing operations that do not change 72 // the number of arcs (expect possibly superfinal arcs). 73 // 74 // class ArcMapper { 75 // public: 76 // typedef A FromArc; 77 // typedef B ToArc; 78 // 79 // // Maps an arc type A to arc type B. 80 // B operator()(const A &arc); 81 // // Specifies final action the mapper requires (see above). 82 // // The mapper will be passed final weights as arcs of the 83 // // form A(0, 0, weight, kNoStateId). 84 // MapFinalAction FinalAction() const; 85 // // Specifies input symbol table action the mapper requires (see above). 86 // MapSymbolsAction InputSymbolsAction() const; 87 // // Specifies output symbol table action the mapper requires (see above). 88 // MapSymbolsAction OutputSymbolsAction() const; 89 // // This specifies the known properties of an Fst mapped by this 90 // // mapper. It takes as argument the input Fst's known properties. 91 // uint64 Properties(uint64 props) const; 92 // }; 93 // 94 // The ArcMap functions and classes below will use the FinalAction() 95 // method of the mapper to determine how to treat final weights, 96 // e.g. whether to add a superfinal state. They will use the Properties() 97 // method to set the result Fst properties. 98 // 99 // We include a various map versions below. One dimension of 100 // variation is whether the mapping mutates its input, writes to a 101 // new result Fst, or is an on-the-fly Fst. Another dimension is how 102 // we pass the mapper. We allow passing the mapper by pointer 103 // for cases that we need to change the state of the user's mapper. 104 // This is the case with the encode mapper, which is reused during 105 // decoding. We also include map versions that pass the mapper 106 // by value or const reference when this suffices. 107 108 109 // Maps an arc type A using a mapper function object C, passed 110 // by pointer. This version modifies its Fst input. 111 template<class A, class C> 112 void ArcMap(MutableFst<A> *fst, C* mapper) { 113 typedef typename A::StateId StateId; 114 typedef typename A::Weight Weight; 115 116 if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) 117 fst->SetInputSymbols(0); 118 119 if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) 120 fst->SetOutputSymbols(0); 121 122 if (fst->Start() == kNoStateId) 123 return; 124 125 uint64 props = fst->Properties(kFstProperties, false); 126 127 MapFinalAction final_action = mapper->FinalAction(); 128 StateId superfinal = kNoStateId; 129 if (final_action == MAP_REQUIRE_SUPERFINAL) { 130 superfinal = fst->AddState(); 131 fst->SetFinal(superfinal, Weight::One()); 132 } 133 134 for (StateId s = 0; s < fst->NumStates(); ++s) { 135 for (MutableArcIterator< MutableFst<A> > aiter(fst, s); 136 !aiter.Done(); aiter.Next()) { 137 const A &arc = aiter.Value(); 138 aiter.SetValue((*mapper)(arc)); 139 } 140 141 switch (final_action) { 142 case MAP_NO_SUPERFINAL: 143 default: { 144 A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); 145 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 146 FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; 147 fst->SetProperties(kError, kError); 148 } 149 150 fst->SetFinal(s, final_arc.weight); 151 break; 152 } 153 case MAP_ALLOW_SUPERFINAL: { 154 if (s != superfinal) { 155 A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); 156 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 157 // Add a superfinal state if not already done. 158 if (superfinal == kNoStateId) { 159 superfinal = fst->AddState(); 160 fst->SetFinal(superfinal, Weight::One()); 161 } 162 final_arc.nextstate = superfinal; 163 fst->AddArc(s, final_arc); 164 fst->SetFinal(s, Weight::Zero()); 165 } else { 166 fst->SetFinal(s, final_arc.weight); 167 } 168 } 169 break; 170 } 171 case MAP_REQUIRE_SUPERFINAL: { 172 if (s != superfinal) { 173 A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); 174 if (final_arc.ilabel != 0 || final_arc.olabel != 0 || 175 final_arc.weight != Weight::Zero()) 176 fst->AddArc(s, A(final_arc.ilabel, final_arc.olabel, 177 final_arc.weight, superfinal)); 178 fst->SetFinal(s, Weight::Zero()); 179 } 180 break; 181 } 182 } 183 } 184 fst->SetProperties(mapper->Properties(props), kFstProperties); 185 } 186 187 188 // Maps an arc type A using a mapper function object C, passed 189 // by value. This version modifies its Fst input. 190 template<class A, class C> 191 void ArcMap(MutableFst<A> *fst, C mapper) { 192 ArcMap(fst, &mapper); 193 } 194 195 196 // Maps an arc type A to an arc type B using mapper function 197 // object C, passed by pointer. This version writes the mapped 198 // input Fst to an output MutableFst. 199 template<class A, class B, class C> 200 void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { 201 typedef typename A::StateId StateId; 202 typedef typename A::Weight Weight; 203 204 ofst->DeleteStates(); 205 206 if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS) 207 ofst->SetInputSymbols(ifst.InputSymbols()); 208 else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) 209 ofst->SetInputSymbols(0); 210 211 if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS) 212 ofst->SetOutputSymbols(ifst.OutputSymbols()); 213 else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) 214 ofst->SetOutputSymbols(0); 215 216 uint64 iprops = ifst.Properties(kCopyProperties, false); 217 218 if (ifst.Start() == kNoStateId) { 219 if (iprops & kError) ofst->SetProperties(kError, kError); 220 return; 221 } 222 223 MapFinalAction final_action = mapper->FinalAction(); 224 if (ifst.Properties(kExpanded, false)) { 225 ofst->ReserveStates(CountStates(ifst) + 226 final_action == MAP_NO_SUPERFINAL ? 0 : 1); 227 } 228 229 // Add all states. 230 for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) 231 ofst->AddState(); 232 233 StateId superfinal = kNoStateId; 234 if (final_action == MAP_REQUIRE_SUPERFINAL) { 235 superfinal = ofst->AddState(); 236 ofst->SetFinal(superfinal, B::Weight::One()); 237 } 238 for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) { 239 StateId s = siter.Value(); 240 if (s == ifst.Start()) 241 ofst->SetStart(s); 242 243 ofst->ReserveArcs(s, ifst.NumArcs(s)); 244 for (ArcIterator< Fst<A> > aiter(ifst, s); !aiter.Done(); aiter.Next()) 245 ofst->AddArc(s, (*mapper)(aiter.Value())); 246 247 switch (final_action) { 248 case MAP_NO_SUPERFINAL: 249 default: { 250 B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); 251 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 252 FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; 253 ofst->SetProperties(kError, kError); 254 } 255 ofst->SetFinal(s, final_arc.weight); 256 break; 257 } 258 case MAP_ALLOW_SUPERFINAL: { 259 B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); 260 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 261 // Add a superfinal state if not already done. 262 if (superfinal == kNoStateId) { 263 superfinal = ofst->AddState(); 264 ofst->SetFinal(superfinal, B::Weight::One()); 265 } 266 final_arc.nextstate = superfinal; 267 ofst->AddArc(s, final_arc); 268 ofst->SetFinal(s, B::Weight::Zero()); 269 } else { 270 ofst->SetFinal(s, final_arc.weight); 271 } 272 break; 273 } 274 case MAP_REQUIRE_SUPERFINAL: { 275 B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); 276 if (final_arc.ilabel != 0 || final_arc.olabel != 0 || 277 final_arc.weight != B::Weight::Zero()) 278 ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel, 279 final_arc.weight, superfinal)); 280 ofst->SetFinal(s, B::Weight::Zero()); 281 break; 282 } 283 } 284 } 285 uint64 oprops = ofst->Properties(kFstProperties, false); 286 ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties); 287 } 288 289 // Maps an arc type A to an arc type B using mapper function 290 // object C, passed by value. This version writes the mapped input 291 // Fst to an output MutableFst. 292 template<class A, class B, class C> 293 void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { 294 ArcMap(ifst, ofst, &mapper); 295 } 296 297 298 struct ArcMapFstOptions : public CacheOptions { 299 // ArcMapFst default caching behaviour is to do no caching. Most 300 // mappers are cheap and therefore we save memory by not doing 301 // caching. 302 ArcMapFstOptions() : CacheOptions(true, 0) {} 303 ArcMapFstOptions(const CacheOptions& opts) : CacheOptions(opts) {} 304 }; 305 306 307 template <class A, class B, class C> class ArcMapFst; 308 309 // Implementation of delayed ArcMapFst. 310 template <class A, class B, class C> 311 class ArcMapFstImpl : public CacheImpl<B> { 312 public: 313 using FstImpl<B>::SetType; 314 using FstImpl<B>::SetProperties; 315 using FstImpl<B>::SetInputSymbols; 316 using FstImpl<B>::SetOutputSymbols; 317 318 using CacheImpl<B>::PushArc; 319 using CacheImpl<B>::HasArcs; 320 using CacheImpl<B>::HasFinal; 321 using CacheImpl<B>::HasStart; 322 using CacheImpl<B>::SetArcs; 323 using CacheImpl<B>::SetFinal; 324 using CacheImpl<B>::SetStart; 325 326 friend class StateIterator< ArcMapFst<A, B, C> >; 327 328 typedef B Arc; 329 typedef typename B::Weight Weight; 330 typedef typename B::StateId StateId; 331 332 ArcMapFstImpl(const Fst<A> &fst, const C &mapper, 333 const ArcMapFstOptions& opts) 334 : CacheImpl<B>(opts), 335 fst_(fst.Copy()), 336 mapper_(new C(mapper)), 337 own_mapper_(true), 338 superfinal_(kNoStateId), 339 nstates_(0) { 340 Init(); 341 } 342 343 ArcMapFstImpl(const Fst<A> &fst, C *mapper, 344 const ArcMapFstOptions& opts) 345 : CacheImpl<B>(opts), 346 fst_(fst.Copy()), 347 mapper_(mapper), 348 own_mapper_(false), 349 superfinal_(kNoStateId), 350 nstates_(0) { 351 Init(); 352 } 353 354 ArcMapFstImpl(const ArcMapFstImpl<A, B, C> &impl) 355 : CacheImpl<B>(impl), 356 fst_(impl.fst_->Copy(true)), 357 mapper_(new C(*impl.mapper_)), 358 own_mapper_(true), 359 superfinal_(kNoStateId), 360 nstates_(0) { 361 Init(); 362 } 363 364 ~ArcMapFstImpl() { 365 delete fst_; 366 if (own_mapper_) delete mapper_; 367 } 368 369 StateId Start() { 370 if (!HasStart()) 371 SetStart(FindOState(fst_->Start())); 372 return CacheImpl<B>::Start(); 373 } 374 375 Weight Final(StateId s) { 376 if (!HasFinal(s)) { 377 switch (final_action_) { 378 case MAP_NO_SUPERFINAL: 379 default: { 380 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 381 kNoStateId)); 382 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 383 FSTERROR() << "ArcMapFst: non-zero arc labels for superfinal arc"; 384 SetProperties(kError, kError); 385 } 386 SetFinal(s, final_arc.weight); 387 break; 388 } 389 case MAP_ALLOW_SUPERFINAL: { 390 if (s == superfinal_) { 391 SetFinal(s, Weight::One()); 392 } else { 393 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 394 kNoStateId)); 395 if (final_arc.ilabel == 0 && final_arc.olabel == 0) 396 SetFinal(s, final_arc.weight); 397 else 398 SetFinal(s, Weight::Zero()); 399 } 400 break; 401 } 402 case MAP_REQUIRE_SUPERFINAL: { 403 SetFinal(s, s == superfinal_ ? Weight::One() : Weight::Zero()); 404 break; 405 } 406 } 407 } 408 return CacheImpl<B>::Final(s); 409 } 410 411 size_t NumArcs(StateId s) { 412 if (!HasArcs(s)) 413 Expand(s); 414 return CacheImpl<B>::NumArcs(s); 415 } 416 417 size_t NumInputEpsilons(StateId s) { 418 if (!HasArcs(s)) 419 Expand(s); 420 return CacheImpl<B>::NumInputEpsilons(s); 421 } 422 423 size_t NumOutputEpsilons(StateId s) { 424 if (!HasArcs(s)) 425 Expand(s); 426 return CacheImpl<B>::NumOutputEpsilons(s); 427 } 428 429 uint64 Properties() const { return Properties(kFstProperties); } 430 431 // Set error if found; return FST impl properties. 432 uint64 Properties(uint64 mask) const { 433 if ((mask & kError) && (fst_->Properties(kError, false) || 434 (mapper_->Properties(0) & kError))) 435 SetProperties(kError, kError); 436 return FstImpl<Arc>::Properties(mask); 437 } 438 439 void InitArcIterator(StateId s, ArcIteratorData<B> *data) { 440 if (!HasArcs(s)) 441 Expand(s); 442 CacheImpl<B>::InitArcIterator(s, data); 443 } 444 445 void Expand(StateId s) { 446 // Add exiting arcs. 447 if (s == superfinal_) { SetArcs(s); return; } 448 449 for (ArcIterator< Fst<A> > aiter(*fst_, FindIState(s)); 450 !aiter.Done(); aiter.Next()) { 451 A aarc(aiter.Value()); 452 aarc.nextstate = FindOState(aarc.nextstate); 453 const B& barc = (*mapper_)(aarc); 454 PushArc(s, barc); 455 } 456 457 // Check for superfinal arcs. 458 if (!HasFinal(s) || Final(s) == Weight::Zero()) 459 switch (final_action_) { 460 case MAP_NO_SUPERFINAL: 461 default: 462 break; 463 case MAP_ALLOW_SUPERFINAL: { 464 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 465 kNoStateId)); 466 if (final_arc.ilabel != 0 || final_arc.olabel != 0) { 467 if (superfinal_ == kNoStateId) 468 superfinal_ = nstates_++; 469 final_arc.nextstate = superfinal_; 470 PushArc(s, final_arc); 471 } 472 break; 473 } 474 case MAP_REQUIRE_SUPERFINAL: { 475 B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), 476 kNoStateId)); 477 if (final_arc.ilabel != 0 || final_arc.olabel != 0 || 478 final_arc.weight != B::Weight::Zero()) 479 PushArc(s, B(final_arc.ilabel, final_arc.olabel, 480 final_arc.weight, superfinal_)); 481 break; 482 } 483 } 484 SetArcs(s); 485 } 486 487 private: 488 void Init() { 489 SetType("map"); 490 491 if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS) 492 SetInputSymbols(fst_->InputSymbols()); 493 else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) 494 SetInputSymbols(0); 495 496 if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS) 497 SetOutputSymbols(fst_->OutputSymbols()); 498 else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) 499 SetOutputSymbols(0); 500 501 if (fst_->Start() == kNoStateId) { 502 final_action_ = MAP_NO_SUPERFINAL; 503 SetProperties(kNullProperties); 504 } else { 505 final_action_ = mapper_->FinalAction(); 506 uint64 props = fst_->Properties(kCopyProperties, false); 507 SetProperties(mapper_->Properties(props)); 508 if (final_action_ == MAP_REQUIRE_SUPERFINAL) 509 superfinal_ = 0; 510 } 511 } 512 513 // Maps from output state to input state. 514 StateId FindIState(StateId s) { 515 if (superfinal_ == kNoStateId || s < superfinal_) 516 return s; 517 else 518 return s - 1; 519 } 520 521 // Maps from input state to output state. 522 StateId FindOState(StateId is) { 523 StateId os; 524 if (superfinal_ == kNoStateId || is < superfinal_) 525 os = is; 526 else 527 os = is + 1; 528 529 if (os >= nstates_) 530 nstates_ = os + 1; 531 532 return os; 533 } 534 535 536 const Fst<A> *fst_; 537 C* mapper_; 538 bool own_mapper_; 539 MapFinalAction final_action_; 540 541 StateId superfinal_; 542 StateId nstates_; 543 544 void operator=(const ArcMapFstImpl<A, B, C> &); // disallow 545 }; 546 547 548 // Maps an arc type A to an arc type B using Mapper function object 549 // C. This version is a delayed Fst. 550 template <class A, class B, class C> 551 class ArcMapFst : public ImplToFst< ArcMapFstImpl<A, B, C> > { 552 public: 553 friend class ArcIterator< ArcMapFst<A, B, C> >; 554 friend class StateIterator< ArcMapFst<A, B, C> >; 555 556 typedef B Arc; 557 typedef typename B::Weight Weight; 558 typedef typename B::StateId StateId; 559 typedef CacheState<B> State; 560 typedef ArcMapFstImpl<A, B, C> Impl; 561 562 ArcMapFst(const Fst<A> &fst, const C &mapper, const ArcMapFstOptions& opts) 563 : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} 564 565 ArcMapFst(const Fst<A> &fst, C* mapper, const ArcMapFstOptions& opts) 566 : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} 567 568 ArcMapFst(const Fst<A> &fst, const C &mapper) 569 : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} 570 571 ArcMapFst(const Fst<A> &fst, C* mapper) 572 : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} 573 574 // See Fst<>::Copy() for doc. 575 ArcMapFst(const ArcMapFst<A, B, C> &fst, bool safe = false) 576 : ImplToFst<Impl>(fst, safe) {} 577 578 // Get a copy of this ArcMapFst. See Fst<>::Copy() for further doc. 579 virtual ArcMapFst<A, B, C> *Copy(bool safe = false) const { 580 return new ArcMapFst<A, B, C>(*this, safe); 581 } 582 583 virtual inline void InitStateIterator(StateIteratorData<B> *data) const; 584 585 virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { 586 GetImpl()->InitArcIterator(s, data); 587 } 588 589 private: 590 // Makes visible to friends. 591 Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } 592 593 void operator=(const ArcMapFst<A, B, C> &fst); // disallow 594 }; 595 596 597 // Specialization for ArcMapFst. 598 template<class A, class B, class C> 599 class StateIterator< ArcMapFst<A, B, C> > : public StateIteratorBase<B> { 600 public: 601 typedef typename B::StateId StateId; 602 603 explicit StateIterator(const ArcMapFst<A, B, C> &fst) 604 : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0), 605 superfinal_(impl_->final_action_ == MAP_REQUIRE_SUPERFINAL) 606 { CheckSuperfinal(); } 607 608 bool Done() const { return siter_.Done() && !superfinal_; } 609 610 StateId Value() const { return s_; } 611 612 void Next() { 613 ++s_; 614 if (!siter_.Done()) { 615 siter_.Next(); 616 CheckSuperfinal(); 617 } 618 else if (superfinal_) 619 superfinal_ = false; 620 } 621 622 void Reset() { 623 s_ = 0; 624 siter_.Reset(); 625 superfinal_ = impl_->final_action_ == MAP_REQUIRE_SUPERFINAL; 626 CheckSuperfinal(); 627 } 628 629 private: 630 // This allows base-class virtual access to non-virtual derived- 631 // class members of the same name. It makes the derived class more 632 // efficient to use but unsafe to further derive. 633 bool Done_() const { return Done(); } 634 StateId Value_() const { return Value(); } 635 void Next_() { Next(); } 636 void Reset_() { Reset(); } 637 638 void CheckSuperfinal() { 639 if (impl_->final_action_ != MAP_ALLOW_SUPERFINAL || superfinal_) 640 return; 641 if (!siter_.Done()) { 642 B final_arc = (*impl_->mapper_)(A(0, 0, impl_->fst_->Final(s_), 643 kNoStateId)); 644 if (final_arc.ilabel != 0 || final_arc.olabel != 0) 645 superfinal_ = true; 646 } 647 } 648 649 const ArcMapFstImpl<A, B, C> *impl_; 650 StateIterator< Fst<A> > siter_; 651 StateId s_; 652 bool superfinal_; // true if there is a superfinal state and not done 653 654 DISALLOW_COPY_AND_ASSIGN(StateIterator); 655 }; 656 657 658 // Specialization for ArcMapFst. 659 template <class A, class B, class C> 660 class ArcIterator< ArcMapFst<A, B, C> > 661 : public CacheArcIterator< ArcMapFst<A, B, C> > { 662 public: 663 typedef typename A::StateId StateId; 664 665 ArcIterator(const ArcMapFst<A, B, C> &fst, StateId s) 666 : CacheArcIterator< ArcMapFst<A, B, C> >(fst.GetImpl(), s) { 667 if (!fst.GetImpl()->HasArcs(s)) 668 fst.GetImpl()->Expand(s); 669 } 670 671 private: 672 DISALLOW_COPY_AND_ASSIGN(ArcIterator); 673 }; 674 675 template <class A, class B, class C> inline 676 void ArcMapFst<A, B, C>::InitStateIterator(StateIteratorData<B> *data) 677 const { 678 data->base = new StateIterator< ArcMapFst<A, B, C> >(*this); 679 } 680 681 682 // 683 // Utility Mappers 684 // 685 686 // Mapper that returns its input. 687 template <class A> 688 struct IdentityArcMapper { 689 typedef A FromArc; 690 typedef A ToArc; 691 692 A operator()(const A &arc) const { return arc; } 693 694 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 695 696 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 697 698 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 699 700 uint64 Properties(uint64 props) const { return props; } 701 }; 702 703 704 // Mapper that returns its input with final states redirected to 705 // a single super-final state. 706 template <class A> 707 struct SuperFinalMapper { 708 typedef A FromArc; 709 typedef A ToArc; 710 711 A operator()(const A &arc) const { return arc; } 712 713 MapFinalAction FinalAction() const { return MAP_REQUIRE_SUPERFINAL; } 714 715 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 716 717 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 718 719 uint64 Properties(uint64 props) const { 720 return props & kAddSuperFinalProperties; 721 } 722 }; 723 724 725 // Mapper that leaves labels and nextstate unchanged and constructs a new weight 726 // from the underlying value of the arc weight. Requires that there is a 727 // WeightConvert class specialization that converts the weights. 728 template <class A, class B> 729 class WeightConvertMapper { 730 public: 731 typedef A FromArc; 732 typedef B ToArc; 733 typedef typename FromArc::Weight FromWeight; 734 typedef typename ToArc::Weight ToWeight; 735 736 ToArc operator()(const FromArc &arc) const { 737 return ToArc(arc.ilabel, arc.olabel, 738 convert_weight_(arc.weight), arc.nextstate); 739 } 740 741 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 742 743 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 744 745 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 746 747 uint64 Properties(uint64 props) const { return props; } 748 749 private: 750 WeightConvert<FromWeight, ToWeight> convert_weight_; 751 }; 752 753 // Non-precision-changing weight conversions. 754 // Consider using more efficient Cast (fst.h) instead. 755 typedef WeightConvertMapper<StdArc, LogArc> StdToLogMapper; 756 typedef WeightConvertMapper<LogArc, StdArc> LogToStdMapper; 757 758 // Precision-changing weight conversions. 759 typedef WeightConvertMapper<StdArc, Log64Arc> StdToLog64Mapper; 760 typedef WeightConvertMapper<LogArc, Log64Arc> LogToLog64Mapper; 761 typedef WeightConvertMapper<Log64Arc, StdArc> Log64ToStdMapper; 762 typedef WeightConvertMapper<Log64Arc, LogArc> Log64ToLogMapper; 763 764 // Mapper from A to GallicArc<A>. 765 template <class A, StringType S = STRING_LEFT> 766 struct ToGallicMapper { 767 typedef A FromArc; 768 typedef GallicArc<A, S> ToArc; 769 770 typedef StringWeight<typename A::Label, S> SW; 771 typedef typename A::Weight AW; 772 typedef typename GallicArc<A, S>::Weight GW; 773 774 ToArc operator()(const A &arc) const { 775 // 'Super-final' arc. 776 if (arc.nextstate == kNoStateId && arc.weight != AW::Zero()) 777 return ToArc(0, 0, GW(SW::One(), arc.weight), kNoStateId); 778 // 'Super-non-final' arc. 779 else if (arc.nextstate == kNoStateId) 780 return ToArc(0, 0, GW(SW::Zero(), arc.weight), kNoStateId); 781 // Epsilon label. 782 else if (arc.olabel == 0) 783 return ToArc(arc.ilabel, arc.ilabel, 784 GW(SW::One(), arc.weight), arc.nextstate); 785 // Regular label. 786 else 787 return ToArc(arc.ilabel, arc.ilabel, 788 GW(SW(arc.olabel), arc.weight), arc.nextstate); 789 } 790 791 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 792 793 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 794 795 MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} 796 797 uint64 Properties(uint64 props) const { 798 return ProjectProperties(props, true) & kWeightInvariantProperties; 799 } 800 }; 801 802 803 // Mapper from GallicArc<A> to A. 804 template <class A, StringType S = STRING_LEFT> 805 struct FromGallicMapper { 806 typedef GallicArc<A, S> FromArc; 807 typedef A ToArc; 808 809 typedef typename A::Label Label; 810 typedef StringWeight<Label, S> SW; 811 typedef typename A::Weight AW; 812 typedef typename GallicArc<A, S>::Weight GW; 813 814 FromGallicMapper(Label superfinal_label = 0) 815 : superfinal_label_(superfinal_label), error_(false) {} 816 817 A operator()(const FromArc &arc) const { 818 // 'Super-non-final' arc. 819 if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) 820 return A(arc.ilabel, 0, AW::Zero(), kNoStateId); 821 822 SW w1 = arc.weight.Value1(); 823 AW w2 = arc.weight.Value2(); 824 StringWeightIterator<Label, S> iter1(w1); 825 826 Label l = w1.Size() == 1 ? iter1.Value() : 0; 827 828 if (l == kStringInfinity || l == kStringBad || 829 arc.ilabel != arc.olabel || w1.Size() > 1) { 830 FSTERROR() << "FromGallicMapper: unrepesentable weight"; 831 error_ = true; 832 } 833 834 if (arc.ilabel == 0 && l != 0 && arc.nextstate == kNoStateId) 835 return A(superfinal_label_, l, w2, arc.nextstate); 836 else 837 return A(arc.ilabel, l, w2, arc.nextstate); 838 } 839 840 MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } 841 842 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 843 844 MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} 845 846 uint64 Properties(uint64 inprops) const { 847 uint64 outprops = inprops & kOLabelInvariantProperties & 848 kWeightInvariantProperties & kAddSuperFinalProperties; 849 if (error_) 850 outprops |= kError; 851 return outprops; 852 } 853 854 private: 855 Label superfinal_label_; 856 mutable bool error_; 857 }; 858 859 860 // Mapper from GallicArc<A> to A. 861 template <class A, StringType S = STRING_LEFT> 862 struct GallicToNewSymbolsMapper { 863 typedef GallicArc<A, S> FromArc; 864 typedef A ToArc; 865 866 typedef typename A::StateId StateId; 867 typedef typename A::Label Label; 868 typedef StringWeight<Label, S> SW; 869 typedef typename A::Weight AW; 870 typedef typename GallicArc<A, S>::Weight GW; 871 872 GallicToNewSymbolsMapper(MutableFst<ToArc> *fst) 873 : fst_(fst), lmax_(0), osymbols_(fst->OutputSymbols()), 874 isymbols_(0), error_(false) { 875 fst_->DeleteStates(); 876 state_ = fst_->AddState(); 877 fst_->SetStart(state_); 878 fst_->SetFinal(state_, AW::One()); 879 if (osymbols_) { 880 string name = osymbols_->Name() + "_from_gallic"; 881 fst_->SetInputSymbols(new SymbolTable(name)); 882 isymbols_ = fst_->MutableInputSymbols(); 883 isymbols_->AddSymbol(osymbols_->Find((int64) 0), 0); 884 } else { 885 fst_->SetInputSymbols(0); 886 } 887 } 888 889 A operator()(const FromArc &arc) { 890 // 'Super-non-final' arc. 891 if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) 892 return A(arc.ilabel, 0, AW::Zero(), kNoStateId); 893 894 SW w1 = arc.weight.Value1(); 895 AW w2 = arc.weight.Value2(); 896 Label l; 897 898 if (w1.Size() == 0) { 899 l = 0; 900 } else { 901 typename Map::iterator miter = map_.find(w1); 902 if (miter != map_.end()) { 903 l = (*miter).second; 904 } else { 905 l = ++lmax_; 906 map_.insert(pair<const SW, Label>(w1, l)); 907 StringWeightIterator<Label, S> iter1(w1); 908 StateId n; 909 string s; 910 for(size_t i = 0, p = state_; 911 i < w1.Size(); 912 ++i, iter1.Next(), p = n) { 913 n = i == w1.Size() - 1 ? state_ : fst_->AddState(); 914 fst_->AddArc(p, ToArc(i ? 0 : l, iter1.Value(), AW::One(), n)); 915 if (isymbols_) { 916 if (i) s = s + "_"; 917 s = s + osymbols_->Find(iter1.Value()); 918 } 919 } 920 if (isymbols_) 921 isymbols_->AddSymbol(s, l); 922 } 923 } 924 925 if (l == kStringInfinity || l == kStringBad || arc.ilabel != arc.olabel) { 926 FSTERROR() << "GallicToNewSymbolMapper: unrepesentable weight"; 927 error_ = true; 928 } 929 930 return A(arc.ilabel, l, w2, arc.nextstate); 931 } 932 933 MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } 934 935 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 936 937 MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } 938 939 uint64 Properties(uint64 inprops) const { 940 uint64 outprops = inprops & kOLabelInvariantProperties & 941 kWeightInvariantProperties & kAddSuperFinalProperties; 942 if (error_) 943 outprops |= kError; 944 return outprops; 945 } 946 947 private: 948 class StringKey { 949 public: 950 size_t operator()(const SW &x) const { 951 return x.Hash(); 952 } 953 }; 954 955 typedef unordered_map<SW, Label, StringKey> Map; 956 957 MutableFst<ToArc> *fst_; 958 Map map_; 959 Label lmax_; 960 StateId state_; 961 const SymbolTable *osymbols_; 962 SymbolTable *isymbols_; 963 mutable bool error_; 964 965 DISALLOW_COPY_AND_ASSIGN(GallicToNewSymbolsMapper); 966 }; 967 968 969 // Mapper to add a constant to all weights. 970 template <class A> 971 struct PlusMapper { 972 typedef A FromArc; 973 typedef A ToArc; 974 typedef typename A::Weight Weight; 975 976 explicit PlusMapper(Weight w) : weight_(w) {} 977 978 A operator()(const A &arc) const { 979 if (arc.weight == Weight::Zero()) 980 return arc; 981 Weight w = Plus(arc.weight, weight_); 982 return A(arc.ilabel, arc.olabel, w, arc.nextstate); 983 } 984 985 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 986 987 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 988 989 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 990 991 uint64 Properties(uint64 props) const { 992 return props & kWeightInvariantProperties; 993 } 994 995 private: 996 997 998 999 Weight weight_; 1000 }; 1001 1002 1003 // Mapper to (right) multiply a constant to all weights. 1004 template <class A> 1005 struct TimesMapper { 1006 typedef A FromArc; 1007 typedef A ToArc; 1008 typedef typename A::Weight Weight; 1009 1010 explicit TimesMapper(Weight w) : weight_(w) {} 1011 1012 A operator()(const A &arc) const { 1013 if (arc.weight == Weight::Zero()) 1014 return arc; 1015 Weight w = Times(arc.weight, weight_); 1016 return A(arc.ilabel, arc.olabel, w, arc.nextstate); 1017 } 1018 1019 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1020 1021 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1022 1023 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1024 1025 uint64 Properties(uint64 props) const { 1026 return props & kWeightInvariantProperties; 1027 } 1028 1029 private: 1030 Weight weight_; 1031 }; 1032 1033 1034 // Mapper to reciprocate all non-Zero() weights. 1035 template <class A> 1036 struct InvertWeightMapper { 1037 typedef A FromArc; 1038 typedef A ToArc; 1039 typedef typename A::Weight Weight; 1040 1041 A operator()(const A &arc) const { 1042 if (arc.weight == Weight::Zero()) 1043 return arc; 1044 Weight w = Divide(Weight::One(), arc.weight); 1045 return A(arc.ilabel, arc.olabel, w, arc.nextstate); 1046 } 1047 1048 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1049 1050 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1051 1052 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1053 1054 uint64 Properties(uint64 props) const { 1055 return props & kWeightInvariantProperties; 1056 } 1057 }; 1058 1059 1060 // Mapper to map all non-Zero() weights to One(). 1061 template <class A, class B = A> 1062 struct RmWeightMapper { 1063 typedef A FromArc; 1064 typedef B ToArc; 1065 typedef typename FromArc::Weight FromWeight; 1066 typedef typename ToArc::Weight ToWeight; 1067 1068 B operator()(const A &arc) const { 1069 ToWeight w = arc.weight != FromWeight::Zero() ? 1070 ToWeight::One() : ToWeight::Zero(); 1071 return B(arc.ilabel, arc.olabel, w, arc.nextstate); 1072 } 1073 1074 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1075 1076 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1077 1078 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1079 1080 uint64 Properties(uint64 props) const { 1081 return (props & kWeightInvariantProperties) | kUnweighted; 1082 } 1083 }; 1084 1085 1086 // Mapper to quantize all weights. 1087 template <class A, class B = A> 1088 struct QuantizeMapper { 1089 typedef A FromArc; 1090 typedef B ToArc; 1091 typedef typename FromArc::Weight FromWeight; 1092 typedef typename ToArc::Weight ToWeight; 1093 1094 QuantizeMapper() : delta_(kDelta) {} 1095 1096 explicit QuantizeMapper(float d) : delta_(d) {} 1097 1098 B operator()(const A &arc) const { 1099 ToWeight w = arc.weight.Quantize(delta_); 1100 return B(arc.ilabel, arc.olabel, w, arc.nextstate); 1101 } 1102 1103 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1104 1105 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1106 1107 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1108 1109 uint64 Properties(uint64 props) const { 1110 return props & kWeightInvariantProperties; 1111 } 1112 1113 private: 1114 float delta_; 1115 }; 1116 1117 1118 // Mapper from A to B under the assumption: 1119 // B::Weight = A::Weight::ReverseWeight 1120 // B::Label == A::Label 1121 // B::StateId == A::StateId 1122 // The weight is reversed, while the label and nextstate preserved 1123 // in the mapping. 1124 template <class A, class B> 1125 struct ReverseWeightMapper { 1126 typedef A FromArc; 1127 typedef B ToArc; 1128 1129 B operator()(const A &arc) const { 1130 return B(arc.ilabel, arc.olabel, arc.weight.Reverse(), arc.nextstate); 1131 } 1132 1133 MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } 1134 1135 MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } 1136 1137 MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} 1138 1139 uint64 Properties(uint64 props) const { return props; } 1140 }; 1141 1142 } // namespace fst 1143 1144 #endif // FST_LIB_ARC_MAP_H__ 1145