1 // compose.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // Class to compute the composition of two FSTs 20 21 #ifndef FST_LIB_COMPOSE_H__ 22 #define FST_LIB_COMPOSE_H__ 23 24 #include <algorithm> 25 #include <string> 26 #include <vector> 27 using std::vector; 28 29 #include <fst/cache.h> 30 #include <fst/compose-filter.h> 31 #include <fst/lookahead-filter.h> 32 #include <fst/matcher.h> 33 #include <fst/state-table.h> 34 #include <fst/test-properties.h> 35 36 37 namespace fst { 38 39 // Delayed composition options templated on the arc type, the matcher, 40 // the composition filter, and the composition state table. By 41 // default, the matchers, filter, and state table are constructed by 42 // composition. If set below, the user can instead pass in these 43 // objects; in that case, ComposeFst takes their ownership. This 44 // version controls composition implemented between generic Fst<Arc> 45 // types and a shared matcher type M for Fst<Arc>. This should be 46 // adequate for most applications, giving a reasonable tradeoff 47 // between efficiency and code sharing (but see ComposeFstImplOptions). 48 template <class A, 49 class M = Matcher<Fst<A> >, 50 class F = SequenceComposeFilter<M>, 51 class T = GenericComposeStateTable<A, typename F::FilterState> > 52 struct ComposeFstOptions : public CacheOptions { 53 M *matcher1; // FST1 matcher (see matcher.h) 54 M *matcher2; // FST2 matcher 55 F *filter; // Composition filter (see compose-filter.h) 56 T *state_table; // Composition state table (see compose-state-table.h) 57 58 explicit ComposeFstOptions(const CacheOptions &opts, 59 M *mat1 = 0, M *mat2 = 0, 60 F *filt = 0, T *sttable= 0) 61 : CacheOptions(opts), matcher1(mat1), matcher2(mat2), 62 filter(filt), state_table(sttable) {} 63 64 ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {} 65 }; 66 67 68 // Delayed composition options templated on the two matcher types, the 69 // composition filter, and the composition state table. By default, 70 // the matchers, filter, and state table are constructed by 71 // composition. If set below, the user can instead pass in these 72 // objects; in that case, ComposeFst takes their ownership. This 73 // version controls composition implemented using arbitrary matchers 74 // (of the same Arc type but otherwise arbitrary Fst type). The user 75 // must ensure the matchers are compatible. These options permit the 76 // most efficient use, but shares the least code. This is for advanced 77 // use only in the most demanding or specialized applications that can 78 // benefit from it (o.w. prefer ComposeFstOptions). 79 template <class M1, class M2, 80 class F = SequenceComposeFilter<M1, M2>, 81 class T = GenericComposeStateTable<typename M1::Arc, 82 typename F::FilterState> > 83 struct ComposeFstImplOptions : public CacheOptions { 84 M1 *matcher1; // FST1 matcher (see matcher.h) 85 M2 *matcher2; // FST2 matcher 86 F *filter; // Composition filter (see compose-filter.h) 87 T *state_table; // Composition state table (see compose-state-table.h) 88 89 explicit ComposeFstImplOptions(const CacheOptions &opts, 90 M1 *mat1 = 0, M2 *mat2 = 0, 91 F *filt = 0, T *sttable= 0) 92 : CacheOptions(opts), matcher1(mat1), matcher2(mat2), 93 filter(filt), state_table(sttable) {} 94 95 ComposeFstImplOptions() 96 : matcher1(0), matcher2(0), filter(0), state_table(0) {} 97 }; 98 99 100 // Implementation of delayed composition. This base class is 101 // common to the variants with different matchers, composition filters 102 // and state tables. 103 template <class A> 104 class ComposeFstImplBase : public CacheImpl<A> { 105 public: 106 using FstImpl<A>::SetType; 107 using FstImpl<A>::SetProperties; 108 using FstImpl<A>::Properties; 109 using FstImpl<A>::SetInputSymbols; 110 using FstImpl<A>::SetOutputSymbols; 111 112 using CacheBaseImpl< CacheState<A> >::HasStart; 113 using CacheBaseImpl< CacheState<A> >::HasFinal; 114 using CacheBaseImpl< CacheState<A> >::HasArcs; 115 using CacheBaseImpl< CacheState<A> >::SetFinal; 116 using CacheBaseImpl< CacheState<A> >::SetStart; 117 118 typedef typename A::Label Label; 119 typedef typename A::Weight Weight; 120 typedef typename A::StateId StateId; 121 typedef CacheState<A> State; 122 123 ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2, 124 const CacheOptions &opts) 125 : CacheImpl<A>(opts) { 126 VLOG(2) << "ComposeFst(" << this << "): Begin"; 127 SetType("compose"); 128 129 if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) { 130 FSTERROR() << "ComposeFst: output symbol table of 1st argument " 131 << "does not match input symbol table of 2nd argument"; 132 SetProperties(kError, kError); 133 } 134 135 SetInputSymbols(fst1.InputSymbols()); 136 SetOutputSymbols(fst2.OutputSymbols()); 137 } 138 139 ComposeFstImplBase(const ComposeFstImplBase<A> &impl) 140 : CacheImpl<A>(impl, true) { 141 SetProperties(impl.Properties(), kCopyProperties); 142 SetInputSymbols(impl.InputSymbols()); 143 SetOutputSymbols(impl.OutputSymbols()); 144 } 145 146 virtual ComposeFstImplBase<A> *Copy() = 0; 147 148 virtual ~ComposeFstImplBase() {} 149 150 StateId Start() { 151 if (!HasStart()) { 152 StateId start = ComputeStart(); 153 if (start != kNoStateId) { 154 SetStart(start); 155 } 156 } 157 return CacheImpl<A>::Start(); 158 } 159 160 Weight Final(StateId s) { 161 if (!HasFinal(s)) { 162 Weight final = ComputeFinal(s); 163 SetFinal(s, final); 164 } 165 return CacheImpl<A>::Final(s); 166 } 167 168 virtual void Expand(StateId s) = 0; 169 170 size_t NumArcs(StateId s) { 171 if (!HasArcs(s)) 172 Expand(s); 173 return CacheImpl<A>::NumArcs(s); 174 } 175 176 size_t NumInputEpsilons(StateId s) { 177 if (!HasArcs(s)) 178 Expand(s); 179 return CacheImpl<A>::NumInputEpsilons(s); 180 } 181 182 size_t NumOutputEpsilons(StateId s) { 183 if (!HasArcs(s)) 184 Expand(s); 185 return CacheImpl<A>::NumOutputEpsilons(s); 186 } 187 188 void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 189 if (!HasArcs(s)) 190 Expand(s); 191 CacheImpl<A>::InitArcIterator(s, data); 192 } 193 194 protected: 195 virtual StateId ComputeStart() = 0; 196 virtual Weight ComputeFinal(StateId s) = 0; 197 }; 198 199 200 // Implementaion of delayed composition templated on the matchers (see 201 // matcher.h), composition filter (see compose-filter-inl.h) and 202 // the composition state table (see compose-state-table.h). 203 template <class M1, class M2, class F, class T> 204 class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> { 205 typedef typename M1::FST FST1; 206 typedef typename M2::FST FST2; 207 typedef typename M1::Arc Arc; 208 typedef typename Arc::StateId StateId; 209 typedef typename Arc::Label Label; 210 typedef typename Arc::Weight Weight; 211 typedef typename F::FilterState FilterState; 212 typedef typename F::Matcher1 Matcher1; 213 typedef typename F::Matcher2 Matcher2; 214 215 using CacheBaseImpl<CacheState<Arc> >::SetArcs; 216 using FstImpl<Arc>::SetType; 217 using FstImpl<Arc>::SetProperties; 218 219 typedef ComposeStateTuple<StateId, FilterState> StateTuple; 220 221 public: 222 ComposeFstImpl(const FST1 &fst1, const FST2 &fst2, 223 const ComposeFstImplOptions<M1, M2, F, T> &opts); 224 225 ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl) 226 : ComposeFstImplBase<Arc>(impl), 227 filter_(new F(*impl.filter_, true)), 228 matcher1_(filter_->GetMatcher1()), 229 matcher2_(filter_->GetMatcher2()), 230 fst1_(matcher1_->GetFst()), 231 fst2_(matcher2_->GetFst()), 232 state_table_(new T(*impl.state_table_)), 233 match_type_(impl.match_type_) {} 234 235 ~ComposeFstImpl() { 236 VLOG(2) << "ComposeFst(" << this 237 << "): End: # of visited states: " << state_table_->Size(); 238 239 delete filter_; 240 delete state_table_; 241 } 242 243 virtual ComposeFstImpl<M1, M2, F, T> *Copy() { 244 return new ComposeFstImpl<M1, M2, F, T>(*this); 245 } 246 247 uint64 Properties() const { return Properties(kFstProperties); } 248 249 // Set error if found; return FST impl properties. 250 uint64 Properties(uint64 mask) const { 251 if ((mask & kError) && 252 (fst1_.Properties(kError, false) || 253 fst2_.Properties(kError, false) || 254 (matcher1_->Properties(0) & kError) || 255 (matcher2_->Properties(0) & kError) | 256 (filter_->Properties(0) & kError) || 257 state_table_->Error())) { 258 SetProperties(kError, kError); 259 } 260 return FstImpl<Arc>::Properties(mask); 261 } 262 263 // Arranges it so that the first arg to OrderedExpand is the Fst 264 // that will be matched on. 265 void Expand(StateId s) { 266 const StateTuple &tuple = state_table_->Tuple(s); 267 StateId s1 = tuple.state_id1; 268 StateId s2 = tuple.state_id2; 269 filter_->SetState(s1, s2, tuple.filter_state); 270 if (match_type_ == MATCH_OUTPUT || 271 (match_type_ == MATCH_BOTH && 272 internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2))) 273 OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false); 274 else 275 OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true); 276 } 277 278 const FST1 &GetFst1() { return fst1_; } 279 const FST2 &GetFst2() { return fst2_; } 280 M1 *GetMatcher1() { return matcher1_; } 281 M2 *GetMatcher2() { return matcher2_; } 282 F *GetFilter() { return filter_; } 283 T *GetStateTable() { return state_table_; } 284 285 private: 286 // This does that actual matching of labels in the composition. The 287 // arguments are ordered so matching is called on state 'sa' of 288 // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg 289 // determines whether the input or output label of arcs at 'sb' is 290 // the one to match on. 291 template <class FST, class Matcher> 292 void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa, 293 const FST &fstb, StateId sb, 294 Matcher *matchera, bool match_input) { 295 matchera->SetState(sa); 296 297 // First process non-consuming symbols (e.g., epsilons) on FSTA. 298 Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0, 299 Weight::One(), sb); 300 MatchArc(s, matchera, loop, match_input); 301 302 // Then process matches on FSTB. 303 for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next()) 304 MatchArc(s, matchera, iterb.Value(), match_input); 305 306 SetArcs(s); 307 } 308 309 // Matches a single transition from 'fstb' against 'fata' at 's'. 310 template <class Matcher> 311 void MatchArc(StateId s, Matcher *matchera, 312 const Arc &arc, bool match_input) { 313 if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) { 314 for (; !matchera->Done(); matchera->Next()) { 315 Arc arca = matchera->Value(); 316 Arc arcb = arc; 317 if (match_input) { 318 const FilterState &f = filter_->FilterArc(&arcb, &arca); 319 if (f != FilterState::NoState()) 320 AddArc(s, arcb, arca, f); 321 } else { 322 const FilterState &f = filter_->FilterArc(&arca, &arcb); 323 if (f != FilterState::NoState()) 324 AddArc(s, arca, arcb, f); 325 } 326 } 327 } 328 } 329 330 // Add a matching transition at 's'. 331 void AddArc(StateId s, const Arc &arc1, const Arc &arc2, 332 const FilterState &f) { 333 StateTuple tuple(arc1.nextstate, arc2.nextstate, f); 334 Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight), 335 state_table_->FindState(tuple)); 336 CacheImpl<Arc>::PushArc(s, oarc); 337 } 338 339 StateId ComputeStart() { 340 StateId s1 = fst1_.Start(); 341 if (s1 == kNoStateId) 342 return kNoStateId; 343 344 StateId s2 = fst2_.Start(); 345 if (s2 == kNoStateId) 346 return kNoStateId; 347 348 const FilterState &f = filter_->Start(); 349 StateTuple tuple(s1, s2, f); 350 return state_table_->FindState(tuple); 351 } 352 353 Weight ComputeFinal(StateId s) { 354 const StateTuple &tuple = state_table_->Tuple(s); 355 StateId s1 = tuple.state_id1; 356 Weight final1 = internal::Final(fst1_, s1); 357 if (final1 == Weight::Zero()) 358 return final1; 359 360 StateId s2 = tuple.state_id2; 361 Weight final2 = internal::Final(fst2_, s2); 362 if (final2 == Weight::Zero()) 363 return final2; 364 365 filter_->SetState(s1, s2, tuple.filter_state); 366 filter_->FilterFinal(&final1, &final2); 367 return Times(final1, final2); 368 } 369 370 // Identifies and verifies the capabilities of the matcher to be used for 371 // composition. 372 void SetMatchType(); 373 374 F *filter_; 375 Matcher1 *matcher1_; 376 Matcher2 *matcher2_; 377 const FST1 &fst1_; 378 const FST2 &fst2_; 379 T *state_table_; 380 381 MatchType match_type_; 382 383 void operator=(const ComposeFstImpl<M1, M2, F, T> &); // disallow 384 }; 385 386 template <class M1, class M2, class F, class T> inline 387 ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl( 388 const FST1 &fst1, const FST2 &fst2, 389 const ComposeFstImplOptions<M1, M2, F, T> &opts) 390 : ComposeFstImplBase<Arc>(fst1, fst2, opts), 391 filter_(opts.filter ? opts.filter : 392 new F(fst1, fst2, opts.matcher1, opts.matcher2)), 393 matcher1_(filter_->GetMatcher1()), 394 matcher2_(filter_->GetMatcher2()), 395 fst1_(matcher1_->GetFst()), 396 fst2_(matcher2_->GetFst()), 397 state_table_(opts.state_table ? opts.state_table : 398 new T(fst1_, fst2_)) { 399 SetMatchType(); 400 if (match_type_ == MATCH_NONE) 401 SetProperties(kError, kError); 402 VLOG(2) << "ComposeFst(" << this << "): Match type: " 403 << (match_type_ == MATCH_OUTPUT ? "output" : 404 (match_type_ == MATCH_INPUT ? "input" : 405 (match_type_ == MATCH_BOTH ? "both" : 406 (match_type_ == MATCH_NONE ? "none" : "unknown")))); 407 408 uint64 fprops1 = fst1.Properties(kFstProperties, false); 409 uint64 fprops2 = fst2.Properties(kFstProperties, false); 410 uint64 mprops1 = matcher1_->Properties(fprops1); 411 uint64 mprops2 = matcher2_->Properties(fprops2); 412 uint64 cprops = ComposeProperties(mprops1, mprops2); 413 SetProperties(filter_->Properties(cprops), kCopyProperties); 414 if (state_table_->Error()) SetProperties(kError, kError); 415 VLOG(2) << "ComposeFst(" << this << "): Initialized"; 416 } 417 418 template <class M1, class M2, class F, class T> 419 void ComposeFstImpl<M1, M2, F, T>::SetMatchType() { 420 MatchType type1 = matcher1_->Type(false); 421 MatchType type2 = matcher2_->Type(false); 422 uint32 flags1 = matcher1_->Flags(); 423 uint32 flags2 = matcher2_->Flags(); 424 if (flags1 & flags2 & kRequireMatch) { 425 FSTERROR() << "ComposeFst: only one argument can require matching."; 426 match_type_ = MATCH_NONE; 427 } else if (flags1 & kRequireMatch) { 428 if (matcher1_->Type(true) != MATCH_OUTPUT) { 429 FSTERROR() << "ComposeFst: 1st argument requires matching but cannot."; 430 match_type_ = MATCH_NONE; 431 } 432 match_type_ = MATCH_OUTPUT; 433 } else if (flags2 & kRequireMatch) { 434 if (matcher2_->Type(true) != MATCH_INPUT) { 435 FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot."; 436 match_type_ = MATCH_NONE; 437 } 438 match_type_ = MATCH_INPUT; 439 } else if (flags1 & flags2 & kPreferMatch && 440 type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) { 441 match_type_ = MATCH_BOTH; 442 } else if (flags1 & kPreferMatch && type1 == MATCH_OUTPUT) { 443 match_type_ = MATCH_OUTPUT; 444 } else if (flags2 & kPreferMatch && type2 == MATCH_INPUT) { 445 match_type_ = MATCH_INPUT; 446 } else if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) { 447 match_type_ = MATCH_BOTH; 448 } else if (type1 == MATCH_OUTPUT) { 449 match_type_ = MATCH_OUTPUT; 450 } else if (type2 == MATCH_INPUT) { 451 match_type_ = MATCH_INPUT; 452 } else if (flags1 & kPreferMatch && matcher1_->Type(true) == MATCH_OUTPUT) { 453 match_type_ = MATCH_OUTPUT; 454 } else if (flags2 & kPreferMatch && matcher2_->Type(true) == MATCH_INPUT) { 455 match_type_ = MATCH_INPUT; 456 } else if (matcher1_->Type(true) == MATCH_OUTPUT) { 457 match_type_ = MATCH_OUTPUT; 458 } else if (matcher2_->Type(true) == MATCH_INPUT) { 459 match_type_ = MATCH_INPUT; 460 } else { 461 FSTERROR() << "ComposeFst: 1st argument cannot match on output labels " 462 << "and 2nd argument cannot match on input labels (sort?)."; 463 match_type_ = MATCH_NONE; 464 } 465 } 466 467 468 // Computes the composition of two transducers. This version is a 469 // delayed Fst. If FST1 transduces string x to y with weight a and FST2 470 // transduces y to z with weight b, then their composition transduces 471 // string x to z with weight Times(x, z). 472 // 473 // The output labels of the first transducer or the input labels of 474 // the second transducer must be sorted (with the default matcher). 475 // The weights need to form a commutative semiring (valid for 476 // TropicalWeight and LogWeight). 477 // 478 // Complexity: 479 // Assuming the first FST is unsorted and the second is sorted: 480 // - Time: O(v1 v2 d1 (log d2 + m2)), 481 // - Space: O(v1 v2) 482 // where vi = # of states visited, di = maximum out-degree, and mi the 483 // maximum multiplicity of the states visited for the ith 484 // FST. Constant time and space to visit an input state or arc is 485 // assumed and exclusive of caching. 486 // 487 // Caveats: 488 // - ComposeFst does not trim its output (since it is a delayed operation). 489 // - The efficiency of composition can be strongly affected by several factors: 490 // - the choice of which tnansducer is sorted - prefer sorting the FST 491 // that has the greater average out-degree. 492 // - the amount of non-determinism 493 // - the presence and location of epsilon transitions - avoid epsilon 494 // transitions on the output side of the first transducer or 495 // the input side of the second transducer or prefer placing 496 // them later in a path since they delay matching and can 497 // introduce non-coaccessible states and transitions. 498 // 499 // This class attaches interface to implementation and handles 500 // reference counting, delegating most methods to ImplToFst. 501 template <class A> 502 class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > { 503 public: 504 friend class ArcIterator< ComposeFst<A> >; 505 friend class StateIterator< ComposeFst<A> >; 506 507 typedef A Arc; 508 typedef typename A::Weight Weight; 509 typedef typename A::StateId StateId; 510 typedef CacheState<A> State; 511 typedef ComposeFstImplBase<A> Impl; 512 513 using ImplToFst<Impl>::SetImpl; 514 515 // Compose specifying only caching options. 516 ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, 517 const CacheOptions &opts = CacheOptions()) 518 : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {} 519 520 // Compose specifying one shared matcher type M. Requires input 521 // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for 522 // best code-sharing and matcher compatiblity. 523 template <class M, class F, class T> 524 ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, 525 const ComposeFstOptions<A, M, F, T> &opts) 526 : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {} 527 528 // Compose specifying two matcher types M1 and M2. Requires input 529 // Fsts (of the same Arc type but o.w. arbitrary) match the 530 // corresponding matcher FST types (M1::FST, M2::FST). Recommended 531 // only for advanced use in demanding or specialized applications 532 // due to potential code bloat and matcher incompatibilities. 533 template <class M1, class M2, class F, class T> 534 ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2, 535 const ComposeFstImplOptions<M1, M2, F, T> &opts) 536 : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {} 537 538 // See Fst<>::Copy() for doc. 539 ComposeFst(const ComposeFst<A> &fst, bool safe = false) { 540 if (safe) 541 SetImpl(fst.GetImpl()->Copy()); 542 else 543 SetImpl(fst.GetImpl(), false); 544 } 545 546 // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc. 547 virtual ComposeFst<A> *Copy(bool safe = false) const { 548 return new ComposeFst<A>(*this, safe); 549 } 550 551 virtual inline void InitStateIterator(StateIteratorData<A> *data) const; 552 553 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 554 GetImpl()->InitArcIterator(s, data); 555 } 556 557 protected: 558 ComposeFst() {} 559 560 // Create compose implementation specifying two matcher types. 561 template <class M1, class M2, class F, class T> 562 static Impl *CreateBase2( 563 const typename M1::FST &fst1, const typename M2::FST &fst2, 564 const ComposeFstImplOptions<M1, M2, F, T> &opts) { 565 Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts); 566 if (!(Weight::Properties() & kCommutative)) { 567 int64 props1 = fst1.Properties(kUnweighted, true); 568 int64 props2 = fst2.Properties(kUnweighted, true); 569 if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) { 570 FSTERROR() << "ComposeFst: Weights must be a commutative semiring: " 571 << Weight::Type(); 572 impl->SetProperties(kError, kError); 573 } 574 } 575 return impl; 576 } 577 578 // Create compose implementation specifying one matcher type. 579 // Requires input Fsts and matcher FST type (M::FST) be Fst<A> 580 template <class M, class F, class T> 581 static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2, 582 const ComposeFstOptions<A, M, F, T> &opts) { 583 ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2, 584 opts.filter, opts.state_table); 585 return CreateBase2(fst1, fst2, nopts); 586 } 587 588 // Create compose implementation specifying no matcher type. 589 static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2, 590 const CacheOptions &opts) { 591 switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers 592 default: 593 case MATCH_NONE: { // Default composition (no look-ahead) 594 VLOG(2) << "ComposeFst: Default composition (no look-ahead)"; 595 ComposeFstOptions<Arc> nopts(opts); 596 return CreateBase1(fst1, fst2, nopts); 597 } 598 case MATCH_OUTPUT: { // Lookahead on fst1 599 VLOG(2) << "ComposeFst: Lookahead on fst1"; 600 typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M; 601 typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F; 602 ComposeFstOptions<Arc, M, F> nopts(opts); 603 return CreateBase1(fst1, fst2, nopts); 604 } 605 case MATCH_INPUT: { // Lookahead on fst2 606 VLOG(2) << "ComposeFst: Lookahead on fst2"; 607 typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M; 608 typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F; 609 ComposeFstOptions<Arc, M, F> nopts(opts); 610 return CreateBase1(fst1, fst2, nopts); 611 } 612 } 613 } 614 615 private: 616 // Makes visible to friends. 617 Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } 618 619 void operator=(const ComposeFst<A> &fst); // disallow 620 }; 621 622 623 // Specialization for ComposeFst. 624 template<class A> 625 class StateIterator< ComposeFst<A> > 626 : public CacheStateIterator< ComposeFst<A> > { 627 public: 628 explicit StateIterator(const ComposeFst<A> &fst) 629 : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {} 630 }; 631 632 633 // Specialization for ComposeFst. 634 template <class A> 635 class ArcIterator< ComposeFst<A> > 636 : public CacheArcIterator< ComposeFst<A> > { 637 public: 638 typedef typename A::StateId StateId; 639 640 ArcIterator(const ComposeFst<A> &fst, StateId s) 641 : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) { 642 if (!fst.GetImpl()->HasArcs(s)) 643 fst.GetImpl()->Expand(s); 644 } 645 646 private: 647 DISALLOW_COPY_AND_ASSIGN(ArcIterator); 648 }; 649 650 template <class A> inline 651 void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const { 652 data->base = new StateIterator< ComposeFst<A> >(*this); 653 } 654 655 // Useful alias when using StdArc. 656 typedef ComposeFst<StdArc> StdComposeFst; 657 658 enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER, 659 MATCH_FILTER }; 660 661 struct ComposeOptions { 662 bool connect; // Connect output 663 ComposeFilter filter_type; // Which pre-defined filter to use 664 665 ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER) 666 : connect(c), filter_type(ft) {} 667 ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {} 668 }; 669 670 // Computes the composition of two transducers. This version writes 671 // the composed FST into a MurableFst. If FST1 transduces string x to 672 // y with weight a and FST2 transduces y to z with weight b, then 673 // their composition transduces string x to z with weight 674 // Times(x, z). 675 // 676 // The output labels of the first transducer or the input labels of 677 // the second transducer must be sorted. The weights need to form a 678 // commutative semiring (valid for TropicalWeight and LogWeight). 679 // 680 // Complexity: 681 // Assuming the first FST is unsorted and the second is sorted: 682 // - Time: O(V1 V2 D1 (log D2 + M2)), 683 // - Space: O(V1 V2 D1 M2) 684 // where Vi = # of states, Di = maximum out-degree, and Mi is 685 // the maximum multiplicity for the ith FST. 686 // 687 // Caveats: 688 // - Compose trims its output. 689 // - The efficiency of composition can be strongly affected by several factors: 690 // - the choice of which tnansducer is sorted - prefer sorting the FST 691 // that has the greater average out-degree. 692 // - the amount of non-determinism 693 // - the presence and location of epsilon transitions - avoid epsilon 694 // transitions on the output side of the first transducer or 695 // the input side of the second transducer or prefer placing 696 // them later in a path since they delay matching and can 697 // introduce non-coaccessible states and transitions. 698 template<class Arc> 699 void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, 700 MutableFst<Arc> *ofst, 701 const ComposeOptions &opts = ComposeOptions()) { 702 typedef Matcher< Fst<Arc> > M; 703 704 if (opts.filter_type == AUTO_FILTER) { 705 CacheOptions nopts; 706 nopts.gc_limit = 0; // Cache only the last state for fastest copy. 707 *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts); 708 } else if (opts.filter_type == SEQUENCE_FILTER) { 709 ComposeFstOptions<Arc> copts; 710 copts.gc_limit = 0; // Cache only the last state for fastest copy. 711 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 712 } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { 713 ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts; 714 copts.gc_limit = 0; // Cache only the last state for fastest copy. 715 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 716 } else if (opts.filter_type == MATCH_FILTER) { 717 ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts; 718 copts.gc_limit = 0; // Cache only the last state for fastest copy. 719 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 720 } 721 722 if (opts.connect) 723 Connect(ofst); 724 } 725 726 } // namespace fst 727 728 #endif // FST_LIB_COMPOSE_H__ 729