Home | History | Annotate | Download | only in fst
      1 // matcher.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Classes to allow matching labels leaving FST states.
     20 
     21 #ifndef FST_LIB_MATCHER_H__
     22 #define FST_LIB_MATCHER_H__
     23 
     24 #include <algorithm>
     25 #include <set>
     26 
     27 #include <fst/mutable-fst.h>  // for all internal FST accessors
     28 
     29 
     30 namespace fst {
     31 
     32 // MATCHERS - these can find and iterate through requested labels at
     33 // FST states. In the simplest form, these are just some associative
     34 // map or search keyed on labels. More generally, they may
     35 // implement matching special labels that represent sets of labels
     36 // such as 'sigma' (all), 'rho' (rest), or 'phi' (fail).
     37 // The Matcher interface is:
     38 //
     39 // template <class F>
     40 // class Matcher {
     41 //  public:
     42 //   typedef F FST;
     43 //   typedef F::Arc Arc;
     44 //   typedef typename Arc::StateId StateId;
     45 //   typedef typename Arc::Label Label;
     46 //   typedef typename Arc::Weight Weight;
     47 //
     48 //   // Required constructors.
     49 //   Matcher(const F &fst, MatchType type);
     50 //   // If safe=true, the copy is thread-safe. See Fst<>::Copy()
     51 //   // for further doc.
     52 //   Matcher(const Matcher &matcher, bool safe = false);
     53 //
     54 //   // If safe=true, the copy is thread-safe. See Fst<>::Copy()
     55 //   // for further doc.
     56 //   Matcher<F> *Copy(bool safe = false) const;
     57 //
     58 //   // Returns the match type that can be provided (depending on
     59 //   // compatibility of the input FST). It is either
     60 //   // the requested match type, MATCH_NONE, or MATCH_UNKNOWN.
     61 //   // If 'test' is false, a constant time test is performed, but
     62 //   // MATCH_UNKNOWN may be returned. If 'test' is true,
     63 //   // a definite answer is returned, but may involve more costly
     64 //   // computation (e.g., visiting the Fst).
     65 //   MatchType Type(bool test) const;
     66 //   // Specifies the current state.
     67 //   void SetState(StateId s);
     68 //
     69 //   // This finds matches to a label at the current state.
     70 //   // Returns true if a match found. kNoLabel matches any
     71 //   // 'non-consuming' transitions, e.g., epsilon transitions,
     72 //   // which do not require a matching symbol.
     73 //   bool Find(Label label);
     74 //   // These iterate through any matches found:
     75 //   bool Done() const;         // No more matches.
     76 //   const A& Value() const;    // Current arc (when !Done)
     77 //   void Next();               // Advance to next arc (when !Done)
     78 //   // Initially and after SetState() the iterator methods
     79 //   // have undefined behavior until Find() is called.
     80 //
     81 //   // Return matcher FST.
     82 //   const F& GetFst() const;
     83 //   // This specifies the known Fst properties as viewed from this
     84 //   // matcher. It takes as argument the input Fst's known properties.
     85 //   uint64 Properties(uint64 props) const;
     86 // };
     87 
     88 //
     89 // MATCHER FLAGS (see also kLookAheadFlags in lookahead-matcher.h)
     90 //
     91 // Matcher prefers being used as the matching side in composition.
     92 const uint32 kPreferMatch  = 0x00000001;
     93 
     94 // Matcher needs to be used as the matching side in composition.
     95 const uint32 kRequireMatch = 0x00000002;
     96 
     97 // Flags used for basic matchers (see also lookahead.h).
     98 const uint32 kMatcherFlags = kPreferMatch | kRequireMatch;
     99 
    100 // Matcher interface, templated on the Arc definition; used
    101 // for matcher specializations that are returned by the
    102 // InitMatcher Fst method.
    103 template <class A>
    104 class MatcherBase {
    105  public:
    106   typedef A Arc;
    107   typedef typename A::StateId StateId;
    108   typedef typename A::Label Label;
    109   typedef typename A::Weight Weight;
    110 
    111   virtual ~MatcherBase() {}
    112 
    113   virtual MatcherBase<A> *Copy(bool safe = false) const = 0;
    114   virtual MatchType Type(bool test) const = 0;
    115   void SetState(StateId s) { SetState_(s); }
    116   bool Find(Label label) { return Find_(label); }
    117   bool Done() const { return Done_(); }
    118   const A& Value() const { return Value_(); }
    119   void Next() { Next_(); }
    120   virtual const Fst<A> &GetFst() const = 0;
    121   virtual uint64 Properties(uint64 props) const = 0;
    122   virtual uint32 Flags() const { return 0; }
    123  private:
    124   virtual void SetState_(StateId s) = 0;
    125   virtual bool Find_(Label label) = 0;
    126   virtual bool Done_() const = 0;
    127   virtual const A& Value_() const  = 0;
    128   virtual void Next_()  = 0;
    129 };
    130 
    131 
    132 // A matcher that expects sorted labels on the side to be matched.
    133 // If match_type == MATCH_INPUT, epsilons match the implicit self loop
    134 // Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
    135 // actual epsilon transitions. If match_type == MATCH_OUTPUT, then
    136 // Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
    137 template <class F>
    138 class SortedMatcher : public MatcherBase<typename F::Arc> {
    139  public:
    140   typedef F FST;
    141   typedef typename F::Arc Arc;
    142   typedef typename Arc::StateId StateId;
    143   typedef typename Arc::Label Label;
    144   typedef typename Arc::Weight Weight;
    145 
    146   // Labels >= binary_label will be searched for by binary search,
    147   // o.w. linear search is used.
    148   SortedMatcher(const F &fst, MatchType match_type,
    149                 Label binary_label = 1)
    150       : fst_(fst.Copy()),
    151         s_(kNoStateId),
    152         aiter_(0),
    153         match_type_(match_type),
    154         binary_label_(binary_label),
    155         match_label_(kNoLabel),
    156         narcs_(0),
    157         loop_(kNoLabel, 0, Weight::One(), kNoStateId),
    158         error_(false) {
    159     switch(match_type_) {
    160       case MATCH_INPUT:
    161       case MATCH_NONE:
    162         break;
    163       case MATCH_OUTPUT:
    164         swap(loop_.ilabel, loop_.olabel);
    165         break;
    166       default:
    167         FSTERROR() << "SortedMatcher: bad match type";
    168         match_type_ = MATCH_NONE;
    169         error_ = true;
    170     }
    171   }
    172 
    173   SortedMatcher(const SortedMatcher<F> &matcher, bool safe = false)
    174       : fst_(matcher.fst_->Copy(safe)),
    175         s_(kNoStateId),
    176         aiter_(0),
    177         match_type_(matcher.match_type_),
    178         binary_label_(matcher.binary_label_),
    179         match_label_(kNoLabel),
    180         narcs_(0),
    181         loop_(matcher.loop_),
    182         error_(matcher.error_) {}
    183 
    184   virtual ~SortedMatcher() {
    185     if (aiter_)
    186       delete aiter_;
    187     delete fst_;
    188   }
    189 
    190   virtual SortedMatcher<F> *Copy(bool safe = false) const {
    191     return new SortedMatcher<F>(*this, safe);
    192   }
    193 
    194   virtual MatchType Type(bool test) const {
    195     if (match_type_ == MATCH_NONE)
    196       return match_type_;
    197 
    198     uint64 true_prop =  match_type_ == MATCH_INPUT ?
    199         kILabelSorted : kOLabelSorted;
    200     uint64 false_prop = match_type_ == MATCH_INPUT ?
    201         kNotILabelSorted : kNotOLabelSorted;
    202     uint64 props = fst_->Properties(true_prop | false_prop, test);
    203 
    204     if (props & true_prop)
    205       return match_type_;
    206     else if (props & false_prop)
    207       return MATCH_NONE;
    208     else
    209       return MATCH_UNKNOWN;
    210   }
    211 
    212   void SetState(StateId s) {
    213     if (s_ == s)
    214       return;
    215     s_ = s;
    216     if (match_type_ == MATCH_NONE) {
    217       FSTERROR() << "SortedMatcher: bad match type";
    218       error_ = true;
    219     }
    220     if (aiter_)
    221       delete aiter_;
    222     aiter_ = new ArcIterator<F>(*fst_, s);
    223     aiter_->SetFlags(kArcNoCache, kArcNoCache);
    224     narcs_ = internal::NumArcs(*fst_, s);
    225     loop_.nextstate = s;
    226   }
    227 
    228   bool Find(Label match_label) {
    229     exact_match_ = true;
    230     if (error_) {
    231       current_loop_ = false;
    232       match_label_ = kNoLabel;
    233       return false;
    234     }
    235     current_loop_ = match_label == 0;
    236     match_label_ = match_label == kNoLabel ? 0 : match_label;
    237     if (Search()) {
    238       return true;
    239     } else {
    240       return current_loop_;
    241     }
    242   }
    243 
    244   // Positions matcher to the first position where inserting
    245   // match_label would maintain the sort order.
    246   void LowerBound(Label match_label) {
    247     exact_match_ = false;
    248     current_loop_ = false;
    249     if (error_) {
    250       match_label_ = kNoLabel;
    251       return;
    252     }
    253     match_label_ = match_label;
    254     Search();
    255   }
    256 
    257   // After Find(), returns false if no more exact matches.
    258   // After LowerBound(), returns false if no more arcs.
    259   bool Done() const {
    260     if (current_loop_)
    261       return false;
    262     if (aiter_->Done())
    263       return true;
    264     if (!exact_match_)
    265       return false;
    266     aiter_->SetFlags(
    267       match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
    268       kArcValueFlags);
    269     Label label = match_type_ == MATCH_INPUT ?
    270         aiter_->Value().ilabel : aiter_->Value().olabel;
    271     return label != match_label_;
    272   }
    273 
    274   const Arc& Value() const {
    275     if (current_loop_) {
    276       return loop_;
    277     }
    278     aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
    279     return aiter_->Value();
    280   }
    281 
    282   void Next() {
    283     if (current_loop_)
    284       current_loop_ = false;
    285     else
    286       aiter_->Next();
    287   }
    288 
    289   virtual const F &GetFst() const { return *fst_; }
    290 
    291   virtual uint64 Properties(uint64 inprops) const {
    292     uint64 outprops = inprops;
    293     if (error_) outprops |= kError;
    294     return outprops;
    295   }
    296 
    297   size_t Position() const { return aiter_ ? aiter_->Position() : 0; }
    298 
    299  private:
    300   virtual void SetState_(StateId s) { SetState(s); }
    301   virtual bool Find_(Label label) { return Find(label); }
    302   virtual bool Done_() const { return Done(); }
    303   virtual const Arc& Value_() const { return Value(); }
    304   virtual void Next_() { Next(); }
    305 
    306   bool Search();
    307 
    308   const F *fst_;
    309   StateId s_;                     // Current state
    310   ArcIterator<F> *aiter_;         // Iterator for current state
    311   MatchType match_type_;          // Type of match to perform
    312   Label binary_label_;            // Least label for binary search
    313   Label match_label_;             // Current label to be matched
    314   size_t narcs_;                  // Current state arc count
    315   Arc loop_;                      // For non-consuming symbols
    316   bool current_loop_;             // Current arc is the implicit loop
    317   bool exact_match_;              // Exact match or lower bound?
    318   bool error_;                    // Error encountered
    319 
    320   void operator=(const SortedMatcher<F> &);  // Disallow
    321 };
    322 
    323 // Returns true iff match to match_label_. Positions arc iterator at
    324 // lower bound regardless.
    325 template <class F> inline
    326 bool SortedMatcher<F>::Search() {
    327   aiter_->SetFlags(
    328       match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
    329       kArcValueFlags);
    330   if (match_label_ >= binary_label_) {
    331     // Binary search for match.
    332     size_t low = 0;
    333     size_t high = narcs_;
    334     while (low < high) {
    335       size_t mid = (low + high) / 2;
    336       aiter_->Seek(mid);
    337       Label label = match_type_ == MATCH_INPUT ?
    338           aiter_->Value().ilabel : aiter_->Value().olabel;
    339       if (label > match_label_) {
    340         high = mid;
    341       } else if (label < match_label_) {
    342         low = mid + 1;
    343       } else {
    344         // find first matching label (when non-determinism)
    345         for (size_t i = mid; i > low; --i) {
    346           aiter_->Seek(i - 1);
    347           label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel :
    348               aiter_->Value().olabel;
    349           if (label != match_label_) {
    350             aiter_->Seek(i);
    351             return true;
    352           }
    353         }
    354         return true;
    355       }
    356     }
    357     aiter_->Seek(low);
    358     return false;
    359   } else {
    360     // Linear search for match.
    361     for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
    362       Label label = match_type_ == MATCH_INPUT ?
    363           aiter_->Value().ilabel : aiter_->Value().olabel;
    364       if (label == match_label_) {
    365         return true;
    366       }
    367       if (label > match_label_)
    368         break;
    369     }
    370     return false;
    371   }
    372 }
    373 
    374 
    375 // Specifies whether during matching we rewrite both the input and output sides.
    376 enum MatcherRewriteMode {
    377   MATCHER_REWRITE_AUTO = 0,    // Rewrites both sides iff acceptor.
    378   MATCHER_REWRITE_ALWAYS,
    379   MATCHER_REWRITE_NEVER
    380 };
    381 
    382 
    383 // For any requested label that doesn't match at a state, this matcher
    384 // considers all transitions that match the label 'rho_label' (rho =
    385 // 'rest').  Each such rho transition found is returned with the
    386 // rho_label rewritten as the requested label (both sides if an
    387 // acceptor, or if 'rewrite_both' is true and both input and output
    388 // labels of the found transition are 'rho_label').  If 'rho_label' is
    389 // kNoLabel, this special matching is not done.  RhoMatcher is
    390 // templated itself on a matcher, which is used to perform the
    391 // underlying matching.  By default, the underlying matcher is
    392 // constructed by RhoMatcher.  The user can instead pass in this
    393 // object; in that case, RhoMatcher takes its ownership.
    394 template <class M>
    395 class RhoMatcher : public MatcherBase<typename M::Arc> {
    396  public:
    397   typedef typename M::FST FST;
    398   typedef typename M::Arc Arc;
    399   typedef typename Arc::StateId StateId;
    400   typedef typename Arc::Label Label;
    401   typedef typename Arc::Weight Weight;
    402 
    403   RhoMatcher(const FST &fst,
    404              MatchType match_type,
    405              Label rho_label = kNoLabel,
    406              MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
    407              M *matcher = 0)
    408       : matcher_(matcher ? matcher : new M(fst, match_type)),
    409         match_type_(match_type),
    410         rho_label_(rho_label),
    411         error_(false) {
    412     if (match_type == MATCH_BOTH) {
    413       FSTERROR() << "RhoMatcher: bad match type";
    414       match_type_ = MATCH_NONE;
    415       error_ = true;
    416     }
    417     if (rho_label == 0) {
    418       FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
    419       rho_label_ = kNoLabel;
    420       error_ = true;
    421     }
    422 
    423     if (rewrite_mode == MATCHER_REWRITE_AUTO)
    424       rewrite_both_ = fst.Properties(kAcceptor, true);
    425     else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
    426       rewrite_both_ = true;
    427     else
    428       rewrite_both_ = false;
    429   }
    430 
    431   RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false)
    432       : matcher_(new M(*matcher.matcher_, safe)),
    433         match_type_(matcher.match_type_),
    434         rho_label_(matcher.rho_label_),
    435         rewrite_both_(matcher.rewrite_both_),
    436         error_(matcher.error_) {}
    437 
    438   virtual ~RhoMatcher() {
    439     delete matcher_;
    440   }
    441 
    442   virtual RhoMatcher<M> *Copy(bool safe = false) const {
    443     return new RhoMatcher<M>(*this, safe);
    444   }
    445 
    446   virtual MatchType Type(bool test) const { return matcher_->Type(test); }
    447 
    448   void SetState(StateId s) {
    449     matcher_->SetState(s);
    450     has_rho_ = rho_label_ != kNoLabel;
    451   }
    452 
    453   bool Find(Label match_label) {
    454     if (match_label == rho_label_ && rho_label_ != kNoLabel) {
    455       FSTERROR() << "RhoMatcher::Find: bad label (rho)";
    456       error_ = true;
    457       return false;
    458     }
    459     if (matcher_->Find(match_label)) {
    460       rho_match_ = kNoLabel;
    461       return true;
    462     } else if (has_rho_ && match_label != 0 && match_label != kNoLabel &&
    463                (has_rho_ = matcher_->Find(rho_label_))) {
    464       rho_match_ = match_label;
    465       return true;
    466     } else {
    467       return false;
    468     }
    469   }
    470 
    471   bool Done() const { return matcher_->Done(); }
    472 
    473   const Arc& Value() const {
    474     if (rho_match_ == kNoLabel) {
    475       return matcher_->Value();
    476     } else {
    477       rho_arc_ = matcher_->Value();
    478       if (rewrite_both_) {
    479         if (rho_arc_.ilabel == rho_label_)
    480           rho_arc_.ilabel = rho_match_;
    481         if (rho_arc_.olabel == rho_label_)
    482           rho_arc_.olabel = rho_match_;
    483       } else if (match_type_ == MATCH_INPUT) {
    484         rho_arc_.ilabel = rho_match_;
    485       } else {
    486         rho_arc_.olabel = rho_match_;
    487       }
    488       return rho_arc_;
    489     }
    490   }
    491 
    492   void Next() { matcher_->Next(); }
    493 
    494   virtual const FST &GetFst() const { return matcher_->GetFst(); }
    495 
    496   virtual uint64 Properties(uint64 props) const;
    497 
    498   virtual uint32 Flags() const {
    499     if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE)
    500       return matcher_->Flags();
    501     return matcher_->Flags() | kRequireMatch;
    502   }
    503 
    504  private:
    505   virtual void SetState_(StateId s) { SetState(s); }
    506   virtual bool Find_(Label label) { return Find(label); }
    507   virtual bool Done_() const { return Done(); }
    508   virtual const Arc& Value_() const { return Value(); }
    509   virtual void Next_() { Next(); }
    510 
    511   M *matcher_;
    512   MatchType match_type_;  // Type of match requested
    513   Label rho_label_;       // Label that represents the rho transition
    514   bool rewrite_both_;     // Rewrite both sides when both are 'rho_label_'
    515   bool has_rho_;          // Are there possibly rhos at the current state?
    516   Label rho_match_;       // Current label that matches rho transition
    517   mutable Arc rho_arc_;   // Arc to return when rho match
    518   bool error_;            // Error encountered
    519 
    520   void operator=(const RhoMatcher<M> &);  // Disallow
    521 };
    522 
    523 template <class M> inline
    524 uint64 RhoMatcher<M>::Properties(uint64 inprops) const {
    525   uint64 outprops = matcher_->Properties(inprops);
    526   if (error_) outprops |= kError;
    527 
    528   if (match_type_ == MATCH_NONE) {
    529     return outprops;
    530   } else if (match_type_ == MATCH_INPUT) {
    531     if (rewrite_both_) {
    532       return outprops & ~(kODeterministic | kNonODeterministic | kString |
    533                        kILabelSorted | kNotILabelSorted |
    534                        kOLabelSorted | kNotOLabelSorted);
    535     } else {
    536       return outprops & ~(kODeterministic | kAcceptor | kString |
    537                        kILabelSorted | kNotILabelSorted);
    538     }
    539   } else if (match_type_ == MATCH_OUTPUT) {
    540     if (rewrite_both_) {
    541       return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
    542                        kILabelSorted | kNotILabelSorted |
    543                        kOLabelSorted | kNotOLabelSorted);
    544     } else {
    545       return outprops & ~(kIDeterministic | kAcceptor | kString |
    546                        kOLabelSorted | kNotOLabelSorted);
    547     }
    548   } else {
    549     // Shouldn't ever get here.
    550     FSTERROR() << "RhoMatcher:: bad match type: " << match_type_;
    551     return 0;
    552   }
    553 }
    554 
    555 
    556 // For any requested label, this matcher considers all transitions
    557 // that match the label 'sigma_label' (sigma = "any"), and this in
    558 // additions to transitions with the requested label.  Each such sigma
    559 // transition found is returned with the sigma_label rewritten as the
    560 // requested label (both sides if an acceptor, or if 'rewrite_both' is
    561 // true and both input and output labels of the found transition are
    562 // 'sigma_label').  If 'sigma_label' is kNoLabel, this special
    563 // matching is not done.  SigmaMatcher is templated itself on a
    564 // matcher, which is used to perform the underlying matching.  By
    565 // default, the underlying matcher is constructed by SigmaMatcher.
    566 // The user can instead pass in this object; in that case,
    567 // SigmaMatcher takes its ownership.
    568 template <class M>
    569 class SigmaMatcher : public MatcherBase<typename M::Arc> {
    570  public:
    571   typedef typename M::FST FST;
    572   typedef typename M::Arc Arc;
    573   typedef typename Arc::StateId StateId;
    574   typedef typename Arc::Label Label;
    575   typedef typename Arc::Weight Weight;
    576 
    577   SigmaMatcher(const FST &fst,
    578                MatchType match_type,
    579                Label sigma_label = kNoLabel,
    580                MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
    581                M *matcher = 0)
    582       : matcher_(matcher ? matcher : new M(fst, match_type)),
    583         match_type_(match_type),
    584         sigma_label_(sigma_label),
    585         error_(false) {
    586     if (match_type == MATCH_BOTH) {
    587       FSTERROR() << "SigmaMatcher: bad match type";
    588       match_type_ = MATCH_NONE;
    589       error_ = true;
    590     }
    591     if (sigma_label == 0) {
    592       FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
    593       sigma_label_ = kNoLabel;
    594       error_ = true;
    595     }
    596 
    597     if (rewrite_mode == MATCHER_REWRITE_AUTO)
    598       rewrite_both_ = fst.Properties(kAcceptor, true);
    599     else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
    600       rewrite_both_ = true;
    601     else
    602       rewrite_both_ = false;
    603   }
    604 
    605   SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false)
    606       : matcher_(new M(*matcher.matcher_, safe)),
    607         match_type_(matcher.match_type_),
    608         sigma_label_(matcher.sigma_label_),
    609         rewrite_both_(matcher.rewrite_both_),
    610         error_(matcher.error_) {}
    611 
    612   virtual ~SigmaMatcher() {
    613     delete matcher_;
    614   }
    615 
    616   virtual SigmaMatcher<M> *Copy(bool safe = false) const {
    617     return new SigmaMatcher<M>(*this, safe);
    618   }
    619 
    620   virtual MatchType Type(bool test) const { return matcher_->Type(test); }
    621 
    622   void SetState(StateId s) {
    623     matcher_->SetState(s);
    624     has_sigma_ =
    625         sigma_label_ != kNoLabel ? matcher_->Find(sigma_label_) : false;
    626   }
    627 
    628   bool Find(Label match_label) {
    629     match_label_ = match_label;
    630     if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
    631       FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
    632       error_ = true;
    633       return false;
    634     }
    635     if (matcher_->Find(match_label)) {
    636       sigma_match_ = kNoLabel;
    637       return true;
    638     } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
    639                matcher_->Find(sigma_label_)) {
    640       sigma_match_ = match_label;
    641       return true;
    642     } else {
    643       return false;
    644     }
    645   }
    646 
    647   bool Done() const {
    648     return matcher_->Done();
    649   }
    650 
    651   const Arc& Value() const {
    652     if (sigma_match_ == kNoLabel) {
    653       return matcher_->Value();
    654     } else {
    655       sigma_arc_ = matcher_->Value();
    656       if (rewrite_both_) {
    657         if (sigma_arc_.ilabel == sigma_label_)
    658           sigma_arc_.ilabel = sigma_match_;
    659         if (sigma_arc_.olabel == sigma_label_)
    660           sigma_arc_.olabel = sigma_match_;
    661       } else if (match_type_ == MATCH_INPUT) {
    662         sigma_arc_.ilabel = sigma_match_;
    663       } else {
    664         sigma_arc_.olabel = sigma_match_;
    665       }
    666       return sigma_arc_;
    667     }
    668   }
    669 
    670   void Next() {
    671     matcher_->Next();
    672     if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
    673         (match_label_ > 0)) {
    674       matcher_->Find(sigma_label_);
    675       sigma_match_ = match_label_;
    676     }
    677   }
    678 
    679   virtual const FST &GetFst() const { return matcher_->GetFst(); }
    680 
    681   virtual uint64 Properties(uint64 props) const;
    682 
    683   virtual uint32 Flags() const {
    684     if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE)
    685       return matcher_->Flags();
    686     // kRequireMatch temporarily disabled until issues
    687     // in //speech/gaudi/annotation/util/denorm are resolved.
    688     // return matcher_->Flags() | kRequireMatch;
    689     return matcher_->Flags();
    690   }
    691 
    692 private:
    693   virtual void SetState_(StateId s) { SetState(s); }
    694   virtual bool Find_(Label label) { return Find(label); }
    695   virtual bool Done_() const { return Done(); }
    696   virtual const Arc& Value_() const { return Value(); }
    697   virtual void Next_() { Next(); }
    698 
    699   M *matcher_;
    700   MatchType match_type_;   // Type of match requested
    701   Label sigma_label_;      // Label that represents the sigma transition
    702   bool rewrite_both_;      // Rewrite both sides when both are 'sigma_label_'
    703   bool has_sigma_;         // Are there sigmas at the current state?
    704   Label sigma_match_;      // Current label that matches sigma transition
    705   mutable Arc sigma_arc_;  // Arc to return when sigma match
    706   Label match_label_;      // Label being matched
    707   bool error_;             // Error encountered
    708 
    709   void operator=(const SigmaMatcher<M> &);  // disallow
    710 };
    711 
    712 template <class M> inline
    713 uint64 SigmaMatcher<M>::Properties(uint64 inprops) const {
    714   uint64 outprops = matcher_->Properties(inprops);
    715   if (error_) outprops |= kError;
    716 
    717   if (match_type_ == MATCH_NONE) {
    718     return outprops;
    719   } else if (rewrite_both_) {
    720     return outprops & ~(kIDeterministic | kNonIDeterministic |
    721                      kODeterministic | kNonODeterministic |
    722                      kILabelSorted | kNotILabelSorted |
    723                      kOLabelSorted | kNotOLabelSorted |
    724                      kString);
    725   } else if (match_type_ == MATCH_INPUT) {
    726     return outprops & ~(kIDeterministic | kNonIDeterministic |
    727                      kODeterministic | kNonODeterministic |
    728                      kILabelSorted | kNotILabelSorted |
    729                      kString | kAcceptor);
    730   } else if (match_type_ == MATCH_OUTPUT) {
    731     return outprops & ~(kIDeterministic | kNonIDeterministic |
    732                      kODeterministic | kNonODeterministic |
    733                      kOLabelSorted | kNotOLabelSorted |
    734                      kString | kAcceptor);
    735   } else {
    736     // Shouldn't ever get here.
    737     FSTERROR() << "SigmaMatcher:: bad match type: " << match_type_;
    738     return 0;
    739   }
    740 }
    741 
    742 
    743 // For any requested label that doesn't match at a state, this matcher
    744 // considers the *unique* transition that matches the label 'phi_label'
    745 // (phi = 'fail'), and recursively looks for a match at its
    746 // destination.  When 'phi_loop' is true, if no match is found but a
    747 // phi self-loop is found, then the phi transition found is returned
    748 // with the phi_label rewritten as the requested label (both sides if
    749 // an acceptor, or if 'rewrite_both' is true and both input and output
    750 // labels of the found transition are 'phi_label').  If 'phi_label' is
    751 // kNoLabel, this special matching is not done.  PhiMatcher is
    752 // templated itself on a matcher, which is used to perform the
    753 // underlying matching.  By default, the underlying matcher is
    754 // constructed by PhiMatcher. The user can instead pass in this
    755 // object; in that case, PhiMatcher takes its ownership.
    756 // Warning: phi non-determinism not supported (for simplicity).
    757 template <class M>
    758 class PhiMatcher : public MatcherBase<typename M::Arc> {
    759  public:
    760   typedef typename M::FST FST;
    761   typedef typename M::Arc Arc;
    762   typedef typename Arc::StateId StateId;
    763   typedef typename Arc::Label Label;
    764   typedef typename Arc::Weight Weight;
    765 
    766   PhiMatcher(const FST &fst,
    767              MatchType match_type,
    768              Label phi_label = kNoLabel,
    769              bool phi_loop = true,
    770              MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
    771              M *matcher = 0)
    772       : matcher_(matcher ? matcher : new M(fst, match_type)),
    773         match_type_(match_type),
    774         phi_label_(phi_label),
    775         state_(kNoStateId),
    776         phi_loop_(phi_loop),
    777         error_(false) {
    778     if (match_type == MATCH_BOTH) {
    779       FSTERROR() << "PhiMatcher: bad match type";
    780       match_type_ = MATCH_NONE;
    781       error_ = true;
    782     }
    783 
    784     if (rewrite_mode == MATCHER_REWRITE_AUTO)
    785       rewrite_both_ = fst.Properties(kAcceptor, true);
    786     else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
    787       rewrite_both_ = true;
    788     else
    789       rewrite_both_ = false;
    790    }
    791 
    792   PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false)
    793       : matcher_(new M(*matcher.matcher_, safe)),
    794         match_type_(matcher.match_type_),
    795         phi_label_(matcher.phi_label_),
    796         rewrite_both_(matcher.rewrite_both_),
    797         state_(kNoStateId),
    798         phi_loop_(matcher.phi_loop_),
    799         error_(matcher.error_) {}
    800 
    801   virtual ~PhiMatcher() {
    802     delete matcher_;
    803   }
    804 
    805   virtual PhiMatcher<M> *Copy(bool safe = false) const {
    806     return new PhiMatcher<M>(*this, safe);
    807   }
    808 
    809   virtual MatchType Type(bool test) const { return matcher_->Type(test); }
    810 
    811   void SetState(StateId s) {
    812     matcher_->SetState(s);
    813     state_ = s;
    814     has_phi_ = phi_label_ != kNoLabel;
    815   }
    816 
    817   bool Find(Label match_label);
    818 
    819   bool Done() const { return matcher_->Done(); }
    820 
    821   const Arc& Value() const {
    822     if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
    823       return matcher_->Value();
    824     } else if (phi_match_ == 0) {  // Virtual epsilon loop
    825       phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_);
    826       if (match_type_ == MATCH_OUTPUT)
    827         swap(phi_arc_.ilabel, phi_arc_.olabel);
    828       return phi_arc_;
    829     } else {
    830       phi_arc_ = matcher_->Value();
    831       phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
    832       if (phi_match_ != kNoLabel) {  // Phi loop match
    833         if (rewrite_both_) {
    834           if (phi_arc_.ilabel == phi_label_)
    835             phi_arc_.ilabel = phi_match_;
    836           if (phi_arc_.olabel == phi_label_)
    837             phi_arc_.olabel = phi_match_;
    838         } else if (match_type_ == MATCH_INPUT) {
    839           phi_arc_.ilabel = phi_match_;
    840         } else {
    841           phi_arc_.olabel = phi_match_;
    842         }
    843       }
    844       return phi_arc_;
    845     }
    846   }
    847 
    848   void Next() { matcher_->Next(); }
    849 
    850   virtual const FST &GetFst() const { return matcher_->GetFst(); }
    851 
    852   virtual uint64 Properties(uint64 props) const;
    853 
    854   virtual uint32 Flags() const {
    855     if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE)
    856       return matcher_->Flags();
    857     return matcher_->Flags() | kRequireMatch;
    858   }
    859 
    860 private:
    861   virtual void SetState_(StateId s) { SetState(s); }
    862   virtual bool Find_(Label label) { return Find(label); }
    863   virtual bool Done_() const { return Done(); }
    864   virtual const Arc& Value_() const { return Value(); }
    865   virtual void Next_() { Next(); }
    866 
    867   M *matcher_;
    868   MatchType match_type_;  // Type of match requested
    869   Label phi_label_;       // Label that represents the phi transition
    870   bool rewrite_both_;     // Rewrite both sides when both are 'phi_label_'
    871   bool has_phi_;          // Are there possibly phis at the current state?
    872   Label phi_match_;       // Current label that matches phi loop
    873   mutable Arc phi_arc_;   // Arc to return
    874   StateId state_;         // State where looking for matches
    875   Weight phi_weight_;     // Product of the weights of phi transitions taken
    876   bool phi_loop_;         // When true, phi self-loop are allowed and treated
    877                           // as rho (required for Aho-Corasick)
    878   bool error_;             // Error encountered
    879 
    880   void operator=(const PhiMatcher<M> &);  // disallow
    881 };
    882 
    883 template <class M> inline
    884 bool PhiMatcher<M>::Find(Label match_label) {
    885   if (match_label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) {
    886     FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_;
    887     error_ = true;
    888     return false;
    889   }
    890   matcher_->SetState(state_);
    891   phi_match_ = kNoLabel;
    892   phi_weight_ = Weight::One();
    893   if (phi_label_ == 0) {          // When 'phi_label_ == 0',
    894     if (match_label == kNoLabel)  // there are no more true epsilon arcs,
    895       return false;
    896     if (match_label == 0) {       // but virtual eps loop need to be returned
    897       if (!matcher_->Find(kNoLabel)) {
    898         return matcher_->Find(0);
    899       } else {
    900         phi_match_ = 0;
    901         return true;
    902       }
    903     }
    904   }
    905   if (!has_phi_ || match_label == 0 || match_label == kNoLabel)
    906     return matcher_->Find(match_label);
    907   StateId state = state_;
    908   while (!matcher_->Find(match_label)) {
    909     // Look for phi transition (if phi_label_ == 0, we need to look
    910     // for -1 to avoid getting the virtual self-loop)
    911     if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_))
    912       return false;
    913     if (phi_loop_ && matcher_->Value().nextstate == state) {
    914       phi_match_ = match_label;
    915       return true;
    916     }
    917     phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
    918     state = matcher_->Value().nextstate;
    919     matcher_->Next();
    920     if (!matcher_->Done()) {
    921       FSTERROR() << "PhiMatcher: phi non-determinism not supported";
    922       error_ = true;
    923     }
    924     matcher_->SetState(state);
    925   }
    926   return true;
    927 }
    928 
    929 template <class M> inline
    930 uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
    931   uint64 outprops = matcher_->Properties(inprops);
    932   if (error_) outprops |= kError;
    933 
    934   if (match_type_ == MATCH_NONE) {
    935     return outprops;
    936   } else if (match_type_ == MATCH_INPUT) {
    937     if (phi_label_ == 0) {
    938       outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
    939       outprops |= kNoEpsilons | kNoIEpsilons;
    940     }
    941     if (rewrite_both_) {
    942       return outprops & ~(kODeterministic | kNonODeterministic | kString |
    943                        kILabelSorted | kNotILabelSorted |
    944                        kOLabelSorted | kNotOLabelSorted);
    945     } else {
    946       return outprops & ~(kODeterministic | kAcceptor | kString |
    947                        kILabelSorted | kNotILabelSorted |
    948                        kOLabelSorted | kNotOLabelSorted);
    949     }
    950   } else if (match_type_ == MATCH_OUTPUT) {
    951     if (phi_label_ == 0) {
    952       outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
    953       outprops |= kNoEpsilons | kNoOEpsilons;
    954     }
    955     if (rewrite_both_) {
    956       return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
    957                        kILabelSorted | kNotILabelSorted |
    958                        kOLabelSorted | kNotOLabelSorted);
    959     } else {
    960       return outprops & ~(kIDeterministic | kAcceptor | kString |
    961                        kILabelSorted | kNotILabelSorted |
    962                        kOLabelSorted | kNotOLabelSorted);
    963     }
    964   } else {
    965     // Shouldn't ever get here.
    966     FSTERROR() << "PhiMatcher:: bad match type: " << match_type_;
    967     return 0;
    968   }
    969 }
    970 
    971 
    972 //
    973 // MULTI-EPS MATCHER FLAGS
    974 //
    975 
    976 // Return multi-epsilon arcs for Find(kNoLabel).
    977 const uint32 kMultiEpsList =  0x00000001;
    978 
    979 // Return a kNolabel loop for Find(multi_eps).
    980 const uint32 kMultiEpsLoop =  0x00000002;
    981 
    982 // MultiEpsMatcher: allows treating multiple non-0 labels as
    983 // non-consuming labels in addition to 0 that is always
    984 // non-consuming. Precise behavior controlled by 'flags' argument. By
    985 // default, the underlying matcher is constructed by
    986 // MultiEpsMatcher. The user can instead pass in this object; in that
    987 // case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
    988 // true.
    989 template <class M>
    990 class MultiEpsMatcher {
    991  public:
    992   typedef typename M::FST FST;
    993   typedef typename M::Arc Arc;
    994   typedef typename Arc::StateId StateId;
    995   typedef typename Arc::Label Label;
    996   typedef typename Arc::Weight Weight;
    997 
    998   MultiEpsMatcher(const FST &fst, MatchType match_type,
    999                   uint32 flags = (kMultiEpsLoop | kMultiEpsList),
   1000                   M *matcher = 0, bool own_matcher = true)
   1001       : matcher_(matcher ? matcher : new M(fst, match_type)),
   1002         flags_(flags),
   1003         own_matcher_(matcher ?  own_matcher : true) {
   1004     if (match_type == MATCH_INPUT) {
   1005       loop_.ilabel = kNoLabel;
   1006       loop_.olabel = 0;
   1007     } else {
   1008       loop_.ilabel = 0;
   1009       loop_.olabel = kNoLabel;
   1010     }
   1011     loop_.weight = Weight::One();
   1012     loop_.nextstate = kNoStateId;
   1013   }
   1014 
   1015   MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false)
   1016       : matcher_(new M(*matcher.matcher_, safe)),
   1017         flags_(matcher.flags_),
   1018         own_matcher_(true),
   1019         multi_eps_labels_(matcher.multi_eps_labels_),
   1020         loop_(matcher.loop_) {
   1021     loop_.nextstate = kNoStateId;
   1022   }
   1023 
   1024   ~MultiEpsMatcher() {
   1025     if (own_matcher_)
   1026       delete matcher_;
   1027   }
   1028 
   1029   MultiEpsMatcher<M> *Copy(bool safe = false) const {
   1030     return new MultiEpsMatcher<M>(*this, safe);
   1031   }
   1032 
   1033   MatchType Type(bool test) const { return matcher_->Type(test); }
   1034 
   1035   void SetState(StateId s) {
   1036     matcher_->SetState(s);
   1037     loop_.nextstate = s;
   1038   }
   1039 
   1040   bool Find(Label match_label);
   1041 
   1042   bool Done() const {
   1043     return done_;
   1044   }
   1045 
   1046   const Arc& Value() const {
   1047     return current_loop_ ? loop_ : matcher_->Value();
   1048   }
   1049 
   1050   void Next() {
   1051     if (!current_loop_) {
   1052       matcher_->Next();
   1053       done_ = matcher_->Done();
   1054       if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
   1055         ++multi_eps_iter_;
   1056         while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
   1057                !matcher_->Find(*multi_eps_iter_))
   1058           ++multi_eps_iter_;
   1059         if (multi_eps_iter_ != multi_eps_labels_.End())
   1060           done_ = false;
   1061         else
   1062           done_ = !matcher_->Find(kNoLabel);
   1063 
   1064       }
   1065     } else {
   1066       done_ = true;
   1067     }
   1068   }
   1069 
   1070   const FST &GetFst() const { return matcher_->GetFst(); }
   1071 
   1072   uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
   1073 
   1074   uint32 Flags() const { return matcher_->Flags(); }
   1075 
   1076   void AddMultiEpsLabel(Label label) {
   1077     if (label == 0) {
   1078       FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
   1079     } else {
   1080       multi_eps_labels_.Insert(label);
   1081     }
   1082   }
   1083 
   1084   void RemoveMultiEpsLabel(Label label) {
   1085     if (label == 0) {
   1086       FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
   1087     } else {
   1088       multi_eps_labels_.Erase(label);
   1089     }
   1090   }
   1091 
   1092   void ClearMultiEpsLabels() {
   1093     multi_eps_labels_.Clear();
   1094   }
   1095 
   1096 private:
   1097   M *matcher_;
   1098   uint32 flags_;
   1099   bool own_matcher_;             // Does this class delete the matcher?
   1100 
   1101   // Multi-eps label set
   1102   CompactSet<Label, kNoLabel> multi_eps_labels_;
   1103   typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
   1104 
   1105   bool current_loop_;            // Current arc is the implicit loop
   1106   mutable Arc loop_;             // For non-consuming symbols
   1107   bool done_;                    // Matching done
   1108 
   1109   void operator=(const MultiEpsMatcher<M> &);  // Disallow
   1110 };
   1111 
   1112 template <class M> inline
   1113 bool MultiEpsMatcher<M>::Find(Label match_label) {
   1114   multi_eps_iter_ = multi_eps_labels_.End();
   1115   current_loop_ = false;
   1116   bool ret;
   1117   if (match_label == 0) {
   1118     ret = matcher_->Find(0);
   1119   } else if (match_label == kNoLabel) {
   1120     if (flags_ & kMultiEpsList) {
   1121       // return all non-consuming arcs (incl. epsilon)
   1122       multi_eps_iter_ = multi_eps_labels_.Begin();
   1123       while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
   1124              !matcher_->Find(*multi_eps_iter_))
   1125         ++multi_eps_iter_;
   1126       if (multi_eps_iter_ != multi_eps_labels_.End())
   1127         ret = true;
   1128       else
   1129         ret = matcher_->Find(kNoLabel);
   1130     } else {
   1131       // return all epsilon arcs
   1132       ret = matcher_->Find(kNoLabel);
   1133     }
   1134   } else if ((flags_ & kMultiEpsLoop) &&
   1135              multi_eps_labels_.Find(match_label) != multi_eps_labels_.End()) {
   1136     // return 'implicit' loop
   1137     current_loop_ = true;
   1138     ret = true;
   1139   } else {
   1140     ret = matcher_->Find(match_label);
   1141   }
   1142   done_ = !ret;
   1143   return ret;
   1144 }
   1145 
   1146 
   1147 // Generic matcher, templated on the FST definition
   1148 // - a wrapper around pointer to specific one.
   1149 // Here is a typical use: \code
   1150 //   Matcher<StdFst> matcher(fst, MATCH_INPUT);
   1151 //   matcher.SetState(state);
   1152 //   if (matcher.Find(label))
   1153 //     for (; !matcher.Done(); matcher.Next()) {
   1154 //       StdArc &arc = matcher.Value();
   1155 //       ...
   1156 //     } \endcode
   1157 template <class F>
   1158 class Matcher {
   1159  public:
   1160   typedef F FST;
   1161   typedef typename F::Arc Arc;
   1162   typedef typename Arc::StateId StateId;
   1163   typedef typename Arc::Label Label;
   1164   typedef typename Arc::Weight Weight;
   1165 
   1166   Matcher(const F &fst, MatchType match_type) {
   1167     base_ = fst.InitMatcher(match_type);
   1168     if (!base_)
   1169       base_ = new SortedMatcher<F>(fst, match_type);
   1170   }
   1171 
   1172   Matcher(const Matcher<F> &matcher, bool safe = false) {
   1173     base_ = matcher.base_->Copy(safe);
   1174   }
   1175 
   1176   // Takes ownership of the provided matcher
   1177   Matcher(MatcherBase<Arc>* base_matcher) { base_ = base_matcher; }
   1178 
   1179   ~Matcher() { delete base_; }
   1180 
   1181   Matcher<F> *Copy(bool safe = false) const {
   1182     return new Matcher<F>(*this, safe);
   1183   }
   1184 
   1185   MatchType Type(bool test) const { return base_->Type(test); }
   1186   void SetState(StateId s) { base_->SetState(s); }
   1187   bool Find(Label label) { return base_->Find(label); }
   1188   bool Done() const { return base_->Done(); }
   1189   const Arc& Value() const { return base_->Value(); }
   1190   void Next() { base_->Next(); }
   1191   const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); }
   1192   uint64 Properties(uint64 props) const { return base_->Properties(props); }
   1193   uint32 Flags() const { return base_->Flags() & kMatcherFlags; }
   1194 
   1195  private:
   1196   MatcherBase<Arc> *base_;
   1197 
   1198   void operator=(const Matcher<Arc> &);  // disallow
   1199 };
   1200 
   1201 }  // namespace fst
   1202 
   1203 
   1204 
   1205 #endif  // FST_LIB_MATCHER_H__
   1206