Home | History | Annotate | Download | only in fst
      1 // matcher.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Classes to allow matching labels leaving FST states.
     20 
     21 #ifndef FST_LIB_MATCHER_H__
     22 #define FST_LIB_MATCHER_H__
     23 
     24 #include <algorithm>
     25 #include <set>
     26 
     27 #include <fst/mutable-fst.h>  // for all internal FST accessors
     28 
     29 
     30 namespace fst {
     31 
     32 // MATCHERS - these can find and iterate through requested labels at
     33 // FST states. In the simplest form, these are just some associative
     34 // map or search keyed on labels. More generally, they may
     35 // implement matching special labels that represent sets of labels
     36 // such as 'sigma' (all), 'rho' (rest), or 'phi' (fail).
     37 // The Matcher interface is:
     38 //
     39 // template <class F>
     40 // class Matcher {
     41 //  public:
     42 //   typedef F FST;
     43 //   typedef F::Arc Arc;
     44 //   typedef typename Arc::StateId StateId;
     45 //   typedef typename Arc::Label Label;
     46 //   typedef typename Arc::Weight Weight;
     47 //
     48 //   // Required constructors.
     49 //   Matcher(const F &fst, MatchType type);
     50 //   // If safe=true, the copy is thread-safe. See Fst<>::Copy()
     51 //   // for further doc.
     52 //   Matcher(const Matcher &matcher, bool safe = false);
     53 //
     54 //   // If safe=true, the copy is thread-safe. See Fst<>::Copy()
     55 //   // for further doc.
     56 //   Matcher<F> *Copy(bool safe = false) const;
     57 //
     58 //   // Returns the match type that can be provided (depending on
     59 //   // compatibility of the input FST). It is either
     60 //   // the requested match type, MATCH_NONE, or MATCH_UNKNOWN.
     61 //   // If 'test' is false, a constant time test is performed, but
     62 //   // MATCH_UNKNOWN may be returned. If 'test' is true,
     63 //   // a definite answer is returned, but may involve more costly
     64 //   // computation (e.g., visiting the Fst).
     65 //   MatchType Type(bool test) const;
     66 //   // Specifies the current state.
     67 //   void SetState(StateId s);
     68 //
     69 //   // This finds matches to a label at the current state.
     70 //   // Returns true if a match found. kNoLabel matches any
     71 //   // 'non-consuming' transitions, e.g., epsilon transitions,
     72 //   // which do not require a matching symbol.
     73 //   bool Find(Label label);
     74 //   // These iterate through any matches found:
     75 //   bool Done() const;         // No more matches.
     76 //   const A& Value() const;    // Current arc (when !Done)
     77 //   void Next();               // Advance to next arc (when !Done)
     78 //
     79 //   // Return matcher FST.
     80 //   const F& GetFst() const;
     81 //   // This specifies the known Fst properties as viewed from this
     82 //   // matcher. It takes as argument the input Fst's known properties.
     83 //   uint64 Properties(uint64 props) const;
     84 // };
     85 
     86 // Flags used for basic matchers (see also lookahead.h).
     87 const uint32 kMatcherFlags = 0x00000000;
     88 
     89 // Matcher interface, templated on the Arc definition; used
     90 // for matcher specializations that are returned by the
     91 // InitMatcher Fst method.
     92 template <class A>
     93 class MatcherBase {
     94  public:
     95   typedef A Arc;
     96   typedef typename A::StateId StateId;
     97   typedef typename A::Label Label;
     98   typedef typename A::Weight Weight;
     99 
    100   virtual ~MatcherBase() {}
    101 
    102   virtual MatcherBase<A> *Copy(bool safe = false) const = 0;
    103   virtual MatchType Type(bool test) const = 0;
    104   void SetState(StateId s) { SetState_(s); }
    105   bool Find(Label label) { return Find_(label); }
    106   bool Done() const { return Done_(); }
    107   const A& Value() const { return Value_(); }
    108   void Next() { Next_(); }
    109   virtual const Fst<A> &GetFst() const = 0;
    110   virtual uint64 Properties(uint64 props) const = 0;
    111   virtual uint32 Flags() const { return 0; }
    112  private:
    113   virtual void SetState_(StateId s) = 0;
    114   virtual bool Find_(Label label) = 0;
    115   virtual bool Done_() const = 0;
    116   virtual const A& Value_() const  = 0;
    117   virtual void Next_()  = 0;
    118 };
    119 
    120 
    121 // A matcher that expects sorted labels on the side to be matched.
    122 // If match_type == MATCH_INPUT, epsilons match the implicit self loop
    123 // Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
    124 // actual epsilon transitions. If match_type == MATCH_OUTPUT, then
    125 // Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
    126 template <class F>
    127 class SortedMatcher : public MatcherBase<typename F::Arc> {
    128  public:
    129   typedef F FST;
    130   typedef typename F::Arc Arc;
    131   typedef typename Arc::StateId StateId;
    132   typedef typename Arc::Label Label;
    133   typedef typename Arc::Weight Weight;
    134 
    135   // Labels >= binary_label will be searched for by binary search,
    136   // o.w. linear search is used.
    137   SortedMatcher(const F &fst, MatchType match_type,
    138                 Label binary_label = 1)
    139       : fst_(fst.Copy()),
    140         s_(kNoStateId),
    141         aiter_(0),
    142         match_type_(match_type),
    143         binary_label_(binary_label),
    144         match_label_(kNoLabel),
    145         narcs_(0),
    146         loop_(kNoLabel, 0, Weight::One(), kNoStateId),
    147         error_(false) {
    148     switch(match_type_) {
    149       case MATCH_INPUT:
    150       case MATCH_NONE:
    151         break;
    152       case MATCH_OUTPUT:
    153         swap(loop_.ilabel, loop_.olabel);
    154         break;
    155       default:
    156         FSTERROR() << "SortedMatcher: bad match type";
    157         match_type_ = MATCH_NONE;
    158         error_ = true;
    159     }
    160   }
    161 
    162   SortedMatcher(const SortedMatcher<F> &matcher, bool safe = false)
    163       : fst_(matcher.fst_->Copy(safe)),
    164         s_(kNoStateId),
    165         aiter_(0),
    166         match_type_(matcher.match_type_),
    167         binary_label_(matcher.binary_label_),
    168         match_label_(kNoLabel),
    169         narcs_(0),
    170         loop_(matcher.loop_),
    171         error_(matcher.error_) {}
    172 
    173   virtual ~SortedMatcher() {
    174     if (aiter_)
    175       delete aiter_;
    176     delete fst_;
    177   }
    178 
    179   virtual SortedMatcher<F> *Copy(bool safe = false) const {
    180     return new SortedMatcher<F>(*this, safe);
    181   }
    182 
    183   virtual MatchType Type(bool test) const {
    184     if (match_type_ == MATCH_NONE)
    185       return match_type_;
    186 
    187     uint64 true_prop =  match_type_ == MATCH_INPUT ?
    188         kILabelSorted : kOLabelSorted;
    189     uint64 false_prop = match_type_ == MATCH_INPUT ?
    190         kNotILabelSorted : kNotOLabelSorted;
    191     uint64 props = fst_->Properties(true_prop | false_prop, test);
    192 
    193     if (props & true_prop)
    194       return match_type_;
    195     else if (props & false_prop)
    196       return MATCH_NONE;
    197     else
    198       return MATCH_UNKNOWN;
    199   }
    200 
    201   void SetState(StateId s) {
    202     if (s_ == s)
    203       return;
    204     s_ = s;
    205     if (match_type_ == MATCH_NONE) {
    206       FSTERROR() << "SortedMatcher: bad match type";
    207       error_ = true;
    208     }
    209     if (aiter_)
    210       delete aiter_;
    211     aiter_ = new ArcIterator<F>(*fst_, s);
    212     aiter_->SetFlags(kArcNoCache, kArcNoCache);
    213     narcs_ = internal::NumArcs(*fst_, s);
    214     loop_.nextstate = s;
    215   }
    216 
    217   bool Find(Label match_label);
    218 
    219   bool Done() const {
    220     if (current_loop_)
    221       return false;
    222     if (aiter_->Done())
    223       return true;
    224     aiter_->SetFlags(
    225       match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
    226       kArcValueFlags);
    227     Label label = match_type_ == MATCH_INPUT ?
    228         aiter_->Value().ilabel : aiter_->Value().olabel;
    229     return label != match_label_;
    230   }
    231 
    232   const Arc& Value() const {
    233     if (current_loop_) {
    234       return loop_;
    235     }
    236     aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
    237     return aiter_->Value();
    238   }
    239 
    240   void Next() {
    241     if (current_loop_)
    242       current_loop_ = false;
    243     else
    244       aiter_->Next();
    245   }
    246 
    247   virtual const F &GetFst() const { return *fst_; }
    248 
    249   virtual uint64 Properties(uint64 inprops) const {
    250     uint64 outprops = inprops;
    251     if (error_) outprops |= kError;
    252     return outprops;
    253   }
    254 
    255  private:
    256   virtual void SetState_(StateId s) { SetState(s); }
    257   virtual bool Find_(Label label) { return Find(label); }
    258   virtual bool Done_() const { return Done(); }
    259   virtual const Arc& Value_() const { return Value(); }
    260   virtual void Next_() { Next(); }
    261 
    262   const F *fst_;
    263   StateId s_;                     // Current state
    264   ArcIterator<F> *aiter_;         // Iterator for current state
    265   MatchType match_type_;          // Type of match to perform
    266   Label binary_label_;            // Least label for binary search
    267   Label match_label_;             // Current label to be matched
    268   size_t narcs_;                  // Current state arc count
    269   Arc loop_;                      // For non-consuming symbols
    270   bool current_loop_;             // Current arc is the implicit loop
    271   bool error_;                    // Error encountered
    272 
    273   void operator=(const SortedMatcher<F> &);  // Disallow
    274 };
    275 
    276 template <class F> inline
    277 bool SortedMatcher<F>::Find(Label match_label) {
    278   if (error_) {
    279     current_loop_ = false;
    280     match_label_ = kNoLabel;
    281     return false;
    282   }
    283   current_loop_ = match_label == 0;
    284   match_label_ = match_label == kNoLabel ? 0 : match_label;
    285   aiter_->SetFlags(
    286       match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
    287       kArcValueFlags);
    288   if (match_label_ >= binary_label_) {
    289     // Binary search for match.
    290     size_t low = 0;
    291     size_t high = narcs_;
    292     while (low < high) {
    293       size_t mid = (low + high) / 2;
    294       aiter_->Seek(mid);
    295       Label label = match_type_ == MATCH_INPUT ?
    296           aiter_->Value().ilabel : aiter_->Value().olabel;
    297       if (label > match_label_) {
    298         high = mid;
    299       } else if (label < match_label_) {
    300         low = mid + 1;
    301       } else {
    302         // find first matching label (when non-determinism)
    303         for (size_t i = mid; i > low; --i) {
    304           aiter_->Seek(i - 1);
    305           label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel :
    306               aiter_->Value().olabel;
    307           if (label != match_label_) {
    308             aiter_->Seek(i);
    309             return true;
    310           }
    311         }
    312         return true;
    313       }
    314     }
    315     return current_loop_;
    316   } else {
    317     // Linear search for match.
    318     for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
    319       Label label = match_type_ == MATCH_INPUT ?
    320           aiter_->Value().ilabel : aiter_->Value().olabel;
    321       if (label == match_label_) {
    322         return true;
    323       }
    324       if (label > match_label_)
    325         break;
    326     }
    327     return current_loop_;
    328   }
    329 }
    330 
    331 
    332 // Specifies whether during matching we rewrite both the input and output sides.
    333 enum MatcherRewriteMode {
    334   MATCHER_REWRITE_AUTO = 0,    // Rewrites both sides iff acceptor.
    335   MATCHER_REWRITE_ALWAYS,
    336   MATCHER_REWRITE_NEVER
    337 };
    338 
    339 
    340 // For any requested label that doesn't match at a state, this matcher
    341 // considers all transitions that match the label 'rho_label' (rho =
    342 // 'rest').  Each such rho transition found is returned with the
    343 // rho_label rewritten as the requested label (both sides if an
    344 // acceptor, or if 'rewrite_both' is true and both input and output
    345 // labels of the found transition are 'rho_label').  If 'rho_label' is
    346 // kNoLabel, this special matching is not done.  RhoMatcher is
    347 // templated itself on a matcher, which is used to perform the
    348 // underlying matching.  By default, the underlying matcher is
    349 // constructed by RhoMatcher.  The user can instead pass in this
    350 // object; in that case, RhoMatcher takes its ownership.
    351 template <class M>
    352 class RhoMatcher : public MatcherBase<typename M::Arc> {
    353  public:
    354   typedef typename M::FST FST;
    355   typedef typename M::Arc Arc;
    356   typedef typename Arc::StateId StateId;
    357   typedef typename Arc::Label Label;
    358   typedef typename Arc::Weight Weight;
    359 
    360   RhoMatcher(const FST &fst,
    361              MatchType match_type,
    362              Label rho_label = kNoLabel,
    363              MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
    364              M *matcher = 0)
    365       : matcher_(matcher ? matcher : new M(fst, match_type)),
    366         match_type_(match_type),
    367         rho_label_(rho_label),
    368         error_(false) {
    369     if (match_type == MATCH_BOTH) {
    370       FSTERROR() << "RhoMatcher: bad match type";
    371       match_type_ = MATCH_NONE;
    372       error_ = true;
    373     }
    374     if (rho_label == 0) {
    375       FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
    376       rho_label_ = kNoLabel;
    377       error_ = true;
    378     }
    379 
    380     if (rewrite_mode == MATCHER_REWRITE_AUTO)
    381       rewrite_both_ = fst.Properties(kAcceptor, true);
    382     else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
    383       rewrite_both_ = true;
    384     else
    385       rewrite_both_ = false;
    386   }
    387 
    388   RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false)
    389       : matcher_(new M(*matcher.matcher_, safe)),
    390         match_type_(matcher.match_type_),
    391         rho_label_(matcher.rho_label_),
    392         rewrite_both_(matcher.rewrite_both_),
    393         error_(matcher.error_) {}
    394 
    395   virtual ~RhoMatcher() {
    396     delete matcher_;
    397   }
    398 
    399   virtual RhoMatcher<M> *Copy(bool safe = false) const {
    400     return new RhoMatcher<M>(*this, safe);
    401   }
    402 
    403   virtual MatchType Type(bool test) const { return matcher_->Type(test); }
    404 
    405   void SetState(StateId s) {
    406     matcher_->SetState(s);
    407     has_rho_ = rho_label_ != kNoLabel;
    408   }
    409 
    410   bool Find(Label match_label) {
    411     if (match_label == rho_label_ && rho_label_ != kNoLabel) {
    412       FSTERROR() << "RhoMatcher::Find: bad label (rho)";
    413       error_ = true;
    414       return false;
    415     }
    416     if (matcher_->Find(match_label)) {
    417       rho_match_ = kNoLabel;
    418       return true;
    419     } else if (has_rho_ && match_label != 0 && match_label != kNoLabel &&
    420                (has_rho_ = matcher_->Find(rho_label_))) {
    421       rho_match_ = match_label;
    422       return true;
    423     } else {
    424       return false;
    425     }
    426   }
    427 
    428   bool Done() const { return matcher_->Done(); }
    429 
    430   const Arc& Value() const {
    431     if (rho_match_ == kNoLabel) {
    432       return matcher_->Value();
    433     } else {
    434       rho_arc_ = matcher_->Value();
    435       if (rewrite_both_) {
    436         if (rho_arc_.ilabel == rho_label_)
    437           rho_arc_.ilabel = rho_match_;
    438         if (rho_arc_.olabel == rho_label_)
    439           rho_arc_.olabel = rho_match_;
    440       } else if (match_type_ == MATCH_INPUT) {
    441         rho_arc_.ilabel = rho_match_;
    442       } else {
    443         rho_arc_.olabel = rho_match_;
    444       }
    445       return rho_arc_;
    446     }
    447   }
    448 
    449   void Next() { matcher_->Next(); }
    450 
    451   virtual const FST &GetFst() const { return matcher_->GetFst(); }
    452 
    453   virtual uint64 Properties(uint64 props) const;
    454 
    455  private:
    456   virtual void SetState_(StateId s) { SetState(s); }
    457   virtual bool Find_(Label label) { return Find(label); }
    458   virtual bool Done_() const { return Done(); }
    459   virtual const Arc& Value_() const { return Value(); }
    460   virtual void Next_() { Next(); }
    461 
    462   M *matcher_;
    463   MatchType match_type_;  // Type of match requested
    464   Label rho_label_;       // Label that represents the rho transition
    465   bool rewrite_both_;     // Rewrite both sides when both are 'rho_label_'
    466   bool has_rho_;          // Are there possibly rhos at the current state?
    467   Label rho_match_;       // Current label that matches rho transition
    468   mutable Arc rho_arc_;   // Arc to return when rho match
    469   bool error_;            // Error encountered
    470 
    471   void operator=(const RhoMatcher<M> &);  // Disallow
    472 };
    473 
    474 template <class M> inline
    475 uint64 RhoMatcher<M>::Properties(uint64 inprops) const {
    476   uint64 outprops = matcher_->Properties(inprops);
    477   if (error_) outprops |= kError;
    478 
    479   if (match_type_ == MATCH_NONE) {
    480     return outprops;
    481   } else if (match_type_ == MATCH_INPUT) {
    482     if (rewrite_both_) {
    483       return outprops & ~(kODeterministic | kNonODeterministic | kString |
    484                        kILabelSorted | kNotILabelSorted |
    485                        kOLabelSorted | kNotOLabelSorted);
    486     } else {
    487       return outprops & ~(kODeterministic | kAcceptor | kString |
    488                        kILabelSorted | kNotILabelSorted);
    489     }
    490   } else if (match_type_ == MATCH_OUTPUT) {
    491     if (rewrite_both_) {
    492       return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
    493                        kILabelSorted | kNotILabelSorted |
    494                        kOLabelSorted | kNotOLabelSorted);
    495     } else {
    496       return outprops & ~(kIDeterministic | kAcceptor | kString |
    497                        kOLabelSorted | kNotOLabelSorted);
    498     }
    499   } else {
    500     // Shouldn't ever get here.
    501     FSTERROR() << "RhoMatcher:: bad match type: " << match_type_;
    502     return 0;
    503   }
    504 }
    505 
    506 
    507 // For any requested label, this matcher considers all transitions
    508 // that match the label 'sigma_label' (sigma = "any"), and this in
    509 // additions to transitions with the requested label.  Each such sigma
    510 // transition found is returned with the sigma_label rewritten as the
    511 // requested label (both sides if an acceptor, or if 'rewrite_both' is
    512 // true and both input and output labels of the found transition are
    513 // 'sigma_label').  If 'sigma_label' is kNoLabel, this special
    514 // matching is not done.  SigmaMatcher is templated itself on a
    515 // matcher, which is used to perform the underlying matching.  By
    516 // default, the underlying matcher is constructed by SigmaMatcher.
    517 // The user can instead pass in this object; in that case,
    518 // SigmaMatcher takes its ownership.
    519 template <class M>
    520 class SigmaMatcher : public MatcherBase<typename M::Arc> {
    521  public:
    522   typedef typename M::FST FST;
    523   typedef typename M::Arc Arc;
    524   typedef typename Arc::StateId StateId;
    525   typedef typename Arc::Label Label;
    526   typedef typename Arc::Weight Weight;
    527 
    528   SigmaMatcher(const FST &fst,
    529                MatchType match_type,
    530                Label sigma_label = kNoLabel,
    531                MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
    532                M *matcher = 0)
    533       : matcher_(matcher ? matcher : new M(fst, match_type)),
    534         match_type_(match_type),
    535         sigma_label_(sigma_label),
    536         error_(false) {
    537     if (match_type == MATCH_BOTH) {
    538       FSTERROR() << "SigmaMatcher: bad match type";
    539       match_type_ = MATCH_NONE;
    540       error_ = true;
    541     }
    542     if (sigma_label == 0) {
    543       FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
    544       sigma_label_ = kNoLabel;
    545       error_ = true;
    546     }
    547 
    548     if (rewrite_mode == MATCHER_REWRITE_AUTO)
    549       rewrite_both_ = fst.Properties(kAcceptor, true);
    550     else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
    551       rewrite_both_ = true;
    552     else
    553       rewrite_both_ = false;
    554   }
    555 
    556   SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false)
    557       : matcher_(new M(*matcher.matcher_, safe)),
    558         match_type_(matcher.match_type_),
    559         sigma_label_(matcher.sigma_label_),
    560         rewrite_both_(matcher.rewrite_both_),
    561         error_(matcher.error_) {}
    562 
    563   virtual ~SigmaMatcher() {
    564     delete matcher_;
    565   }
    566 
    567   virtual SigmaMatcher<M> *Copy(bool safe = false) const {
    568     return new SigmaMatcher<M>(*this, safe);
    569   }
    570 
    571   virtual MatchType Type(bool test) const { return matcher_->Type(test); }
    572 
    573   void SetState(StateId s) {
    574     matcher_->SetState(s);
    575     has_sigma_ =
    576         sigma_label_ != kNoLabel ? matcher_->Find(sigma_label_) : false;
    577   }
    578 
    579   bool Find(Label match_label) {
    580     match_label_ = match_label;
    581     if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
    582       FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
    583       error_ = true;
    584       return false;
    585     }
    586     if (matcher_->Find(match_label)) {
    587       sigma_match_ = kNoLabel;
    588       return true;
    589     } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
    590                matcher_->Find(sigma_label_)) {
    591       sigma_match_ = match_label;
    592       return true;
    593     } else {
    594       return false;
    595     }
    596   }
    597 
    598   bool Done() const {
    599     return matcher_->Done();
    600   }
    601 
    602   const Arc& Value() const {
    603     if (sigma_match_ == kNoLabel) {
    604       return matcher_->Value();
    605     } else {
    606       sigma_arc_ = matcher_->Value();
    607       if (rewrite_both_) {
    608         if (sigma_arc_.ilabel == sigma_label_)
    609           sigma_arc_.ilabel = sigma_match_;
    610         if (sigma_arc_.olabel == sigma_label_)
    611           sigma_arc_.olabel = sigma_match_;
    612       } else if (match_type_ == MATCH_INPUT) {
    613         sigma_arc_.ilabel = sigma_match_;
    614       } else {
    615         sigma_arc_.olabel = sigma_match_;
    616       }
    617       return sigma_arc_;
    618     }
    619   }
    620 
    621   void Next() {
    622     matcher_->Next();
    623     if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
    624         (match_label_ > 0)) {
    625       matcher_->Find(sigma_label_);
    626       sigma_match_ = match_label_;
    627     }
    628   }
    629 
    630   virtual const FST &GetFst() const { return matcher_->GetFst(); }
    631 
    632   virtual uint64 Properties(uint64 props) const;
    633 
    634 private:
    635   virtual void SetState_(StateId s) { SetState(s); }
    636   virtual bool Find_(Label label) { return Find(label); }
    637   virtual bool Done_() const { return Done(); }
    638   virtual const Arc& Value_() const { return Value(); }
    639   virtual void Next_() { Next(); }
    640 
    641   M *matcher_;
    642   MatchType match_type_;   // Type of match requested
    643   Label sigma_label_;      // Label that represents the sigma transition
    644   bool rewrite_both_;      // Rewrite both sides when both are 'sigma_label_'
    645   bool has_sigma_;         // Are there sigmas at the current state?
    646   Label sigma_match_;      // Current label that matches sigma transition
    647   mutable Arc sigma_arc_;  // Arc to return when sigma match
    648   Label match_label_;      // Label being matched
    649   bool error_;             // Error encountered
    650 
    651   void operator=(const SigmaMatcher<M> &);  // disallow
    652 };
    653 
    654 template <class M> inline
    655 uint64 SigmaMatcher<M>::Properties(uint64 inprops) const {
    656   uint64 outprops = matcher_->Properties(inprops);
    657   if (error_) outprops |= kError;
    658 
    659   if (match_type_ == MATCH_NONE) {
    660     return outprops;
    661   } else if (rewrite_both_) {
    662     return outprops & ~(kIDeterministic | kNonIDeterministic |
    663                      kODeterministic | kNonODeterministic |
    664                      kILabelSorted | kNotILabelSorted |
    665                      kOLabelSorted | kNotOLabelSorted |
    666                      kString);
    667   } else if (match_type_ == MATCH_INPUT) {
    668     return outprops & ~(kIDeterministic | kNonIDeterministic |
    669                      kODeterministic | kNonODeterministic |
    670                      kILabelSorted | kNotILabelSorted |
    671                      kString | kAcceptor);
    672   } else if (match_type_ == MATCH_OUTPUT) {
    673     return outprops & ~(kIDeterministic | kNonIDeterministic |
    674                      kODeterministic | kNonODeterministic |
    675                      kOLabelSorted | kNotOLabelSorted |
    676                      kString | kAcceptor);
    677   } else {
    678     // Shouldn't ever get here.
    679     FSTERROR() << "SigmaMatcher:: bad match type: " << match_type_;
    680     return 0;
    681   }
    682 }
    683 
    684 
    685 // For any requested label that doesn't match at a state, this matcher
    686 // considers the *unique* transition that matches the label 'phi_label'
    687 // (phi = 'fail'), and recursively looks for a match at its
    688 // destination.  When 'phi_loop' is true, if no match is found but a
    689 // phi self-loop is found, then the phi transition found is returned
    690 // with the phi_label rewritten as the requested label (both sides if
    691 // an acceptor, or if 'rewrite_both' is true and both input and output
    692 // labels of the found transition are 'phi_label').  If 'phi_label' is
    693 // kNoLabel, this special matching is not done.  PhiMatcher is
    694 // templated itself on a matcher, which is used to perform the
    695 // underlying matching.  By default, the underlying matcher is
    696 // constructed by PhiMatcher. The user can instead pass in this
    697 // object; in that case, PhiMatcher takes its ownership.
    698 // Warning: phi non-determinism not supported (for simplicity).
    699 template <class M>
    700 class PhiMatcher : public MatcherBase<typename M::Arc> {
    701  public:
    702   typedef typename M::FST FST;
    703   typedef typename M::Arc Arc;
    704   typedef typename Arc::StateId StateId;
    705   typedef typename Arc::Label Label;
    706   typedef typename Arc::Weight Weight;
    707 
    708   PhiMatcher(const FST &fst,
    709              MatchType match_type,
    710              Label phi_label = kNoLabel,
    711              bool phi_loop = true,
    712              MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO,
    713              M *matcher = 0)
    714       : matcher_(matcher ? matcher : new M(fst, match_type)),
    715         match_type_(match_type),
    716         phi_label_(phi_label),
    717         state_(kNoStateId),
    718         phi_loop_(phi_loop),
    719         error_(false) {
    720     if (match_type == MATCH_BOTH) {
    721       FSTERROR() << "PhiMatcher: bad match type";
    722       match_type_ = MATCH_NONE;
    723       error_ = true;
    724     }
    725     if (phi_label == 0) {
    726       FSTERROR() << "PhiMatcher: 0 cannot be used as phi_label";
    727       phi_label_ = kNoLabel;
    728       error_ = true;
    729     }
    730 
    731     if (rewrite_mode == MATCHER_REWRITE_AUTO)
    732       rewrite_both_ = fst.Properties(kAcceptor, true);
    733     else if (rewrite_mode == MATCHER_REWRITE_ALWAYS)
    734       rewrite_both_ = true;
    735     else
    736       rewrite_both_ = false;
    737    }
    738 
    739   PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false)
    740       : matcher_(new M(*matcher.matcher_, safe)),
    741         match_type_(matcher.match_type_),
    742         phi_label_(matcher.phi_label_),
    743         rewrite_both_(matcher.rewrite_both_),
    744         state_(kNoStateId),
    745         phi_loop_(matcher.phi_loop_),
    746         error_(matcher.error_) {}
    747 
    748   virtual ~PhiMatcher() {
    749     delete matcher_;
    750   }
    751 
    752   virtual PhiMatcher<M> *Copy(bool safe = false) const {
    753     return new PhiMatcher<M>(*this, safe);
    754   }
    755 
    756   virtual MatchType Type(bool test) const { return matcher_->Type(test); }
    757 
    758   void SetState(StateId s) {
    759     matcher_->SetState(s);
    760     state_ = s;
    761     has_phi_ = phi_label_ != kNoLabel;
    762   }
    763 
    764   bool Find(Label match_label);
    765 
    766   bool Done() const { return matcher_->Done(); }
    767 
    768   const Arc& Value() const {
    769     if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
    770       return matcher_->Value();
    771     } else {
    772       phi_arc_ = matcher_->Value();
    773       phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
    774       if (phi_match_ != kNoLabel) {
    775         if (rewrite_both_) {
    776           if (phi_arc_.ilabel == phi_label_)
    777             phi_arc_.ilabel = phi_match_;
    778           if (phi_arc_.olabel == phi_label_)
    779             phi_arc_.olabel = phi_match_;
    780         } else if (match_type_ == MATCH_INPUT) {
    781           phi_arc_.ilabel = phi_match_;
    782         } else {
    783           phi_arc_.olabel = phi_match_;
    784         }
    785       }
    786       return phi_arc_;
    787     }
    788   }
    789 
    790   void Next() { matcher_->Next(); }
    791 
    792   virtual const FST &GetFst() const { return matcher_->GetFst(); }
    793 
    794   virtual uint64 Properties(uint64 props) const;
    795 
    796 private:
    797   virtual void SetState_(StateId s) { SetState(s); }
    798   virtual bool Find_(Label label) { return Find(label); }
    799   virtual bool Done_() const { return Done(); }
    800   virtual const Arc& Value_() const { return Value(); }
    801   virtual void Next_() { Next(); }
    802 
    803   M *matcher_;
    804   MatchType match_type_;  // Type of match requested
    805   Label phi_label_;       // Label that represents the phi transition
    806   bool rewrite_both_;     // Rewrite both sides when both are 'phi_label_'
    807   bool has_phi_;          // Are there possibly phis at the current state?
    808   Label phi_match_;       // Current label that matches phi loop
    809   mutable Arc phi_arc_;   // Arc to return
    810   StateId state_;         // State where looking for matches
    811   Weight phi_weight_;     // Product of the weights of phi transitions taken
    812   bool phi_loop_;         // When true, phi self-loop are allowed and treated
    813                           // as rho (required for Aho-Corasick)
    814   bool error_;             // Error encountered
    815 
    816   void operator=(const PhiMatcher<M> &);  // disallow
    817 };
    818 
    819 template <class M> inline
    820 bool PhiMatcher<M>::Find(Label match_label) {
    821   if (match_label == phi_label_ && phi_label_ != kNoLabel) {
    822     FSTERROR() << "PhiMatcher::Find: bad label (phi)";
    823     error_ = true;
    824     return false;
    825   }
    826   matcher_->SetState(state_);
    827   phi_match_ = kNoLabel;
    828   phi_weight_ = Weight::One();
    829   if (!has_phi_ || match_label == 0 || match_label == kNoLabel)
    830     return matcher_->Find(match_label);
    831   StateId state = state_;
    832   while (!matcher_->Find(match_label)) {
    833     if (!matcher_->Find(phi_label_))
    834       return false;
    835     if (phi_loop_ && matcher_->Value().nextstate == state) {
    836       phi_match_ = match_label;
    837       return true;
    838     }
    839     phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
    840     state = matcher_->Value().nextstate;
    841     matcher_->Next();
    842     if (!matcher_->Done()) {
    843       FSTERROR() << "PhiMatcher: phi non-determinism not supported";
    844       error_ = true;
    845     }
    846     matcher_->SetState(state);
    847   }
    848   return true;
    849 }
    850 
    851 template <class M> inline
    852 uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
    853   uint64 outprops = matcher_->Properties(inprops);
    854   if (error_) outprops |= kError;
    855 
    856   if (match_type_ == MATCH_NONE) {
    857     return outprops;
    858   } else if (match_type_ == MATCH_INPUT) {
    859     if (rewrite_both_) {
    860       return outprops & ~(kODeterministic | kNonODeterministic | kString |
    861                        kILabelSorted | kNotILabelSorted |
    862                        kOLabelSorted | kNotOLabelSorted);
    863     } else {
    864       return outprops & ~(kODeterministic | kAcceptor | kString |
    865                        kILabelSorted | kNotILabelSorted |
    866                        kOLabelSorted | kNotOLabelSorted);
    867     }
    868   } else if (match_type_ == MATCH_OUTPUT) {
    869     if (rewrite_both_) {
    870       return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
    871                        kILabelSorted | kNotILabelSorted |
    872                        kOLabelSorted | kNotOLabelSorted);
    873     } else {
    874       return outprops & ~(kIDeterministic | kAcceptor | kString |
    875                        kILabelSorted | kNotILabelSorted |
    876                        kOLabelSorted | kNotOLabelSorted);
    877     }
    878   } else {
    879     // Shouldn't ever get here.
    880     FSTERROR() << "PhiMatcher:: bad match type: " << match_type_;
    881     return 0;
    882   }
    883 }
    884 
    885 
    886 //
    887 // MULTI-EPS MATCHER FLAGS
    888 //
    889 
    890 // Return multi-epsilon arcs for Find(kNoLabel).
    891 const uint32 kMultiEpsList =  0x00000001;
    892 
    893 // Return a kNolabel loop for Find(multi_eps).
    894 const uint32 kMultiEpsLoop =  0x00000002;
    895 
    896 // MultiEpsMatcher: allows treating multiple non-0 labels as
    897 // non-consuming labels in addition to 0 that is always
    898 // non-consuming. Precise behavior controlled by 'flags' argument. By
    899 // default, the underlying matcher is constructed by
    900 // MultiEpsMatcher. The user can instead pass in this object; in that
    901 // case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
    902 // true.
    903 template <class M>
    904 class MultiEpsMatcher {
    905  public:
    906   typedef typename M::FST FST;
    907   typedef typename M::Arc Arc;
    908   typedef typename Arc::StateId StateId;
    909   typedef typename Arc::Label Label;
    910   typedef typename Arc::Weight Weight;
    911 
    912   MultiEpsMatcher(const FST &fst, MatchType match_type,
    913                   uint32 flags = (kMultiEpsLoop | kMultiEpsList),
    914                   M *matcher = 0, bool own_matcher = true)
    915       : matcher_(matcher ? matcher : new M(fst, match_type)),
    916         flags_(flags),
    917         own_matcher_(matcher ?  own_matcher : true) {
    918     if (match_type == MATCH_INPUT) {
    919       loop_.ilabel = kNoLabel;
    920       loop_.olabel = 0;
    921     } else {
    922       loop_.ilabel = 0;
    923       loop_.olabel = kNoLabel;
    924     }
    925     loop_.weight = Weight::One();
    926     loop_.nextstate = kNoStateId;
    927   }
    928 
    929   MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false)
    930       : matcher_(new M(*matcher.matcher_, safe)),
    931         flags_(matcher.flags_),
    932         own_matcher_(true),
    933         multi_eps_labels_(matcher.multi_eps_labels_),
    934         loop_(matcher.loop_) {
    935     loop_.nextstate = kNoStateId;
    936   }
    937 
    938   ~MultiEpsMatcher() {
    939     if (own_matcher_)
    940       delete matcher_;
    941   }
    942 
    943   MultiEpsMatcher<M> *Copy(bool safe = false) const {
    944     return new MultiEpsMatcher<M>(*this, safe);
    945   }
    946 
    947   MatchType Type(bool test) const { return matcher_->Type(test); }
    948 
    949   void SetState(StateId s) {
    950     matcher_->SetState(s);
    951     loop_.nextstate = s;
    952   }
    953 
    954   bool Find(Label match_label);
    955 
    956   bool Done() const {
    957     return done_;
    958   }
    959 
    960   const Arc& Value() const {
    961     return current_loop_ ? loop_ : matcher_->Value();
    962   }
    963 
    964   void Next() {
    965     if (!current_loop_) {
    966       matcher_->Next();
    967       done_ = matcher_->Done();
    968       if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
    969         ++multi_eps_iter_;
    970         while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
    971                !matcher_->Find(*multi_eps_iter_))
    972           ++multi_eps_iter_;
    973         if (multi_eps_iter_ != multi_eps_labels_.End())
    974           done_ = false;
    975         else
    976           done_ = !matcher_->Find(kNoLabel);
    977 
    978       }
    979     } else {
    980       done_ = true;
    981     }
    982   }
    983 
    984   const FST &GetFst() const { return matcher_->GetFst(); }
    985 
    986   uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
    987 
    988   uint32 Flags() const { return matcher_->Flags(); }
    989 
    990   void AddMultiEpsLabel(Label label) {
    991     if (label == 0) {
    992       FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
    993     } else {
    994       multi_eps_labels_.Insert(label);
    995     }
    996   }
    997 
    998   void ClearMultiEpsLabels() {
    999     multi_eps_labels_.Clear();
   1000   }
   1001 
   1002 private:
   1003   // Specialized for 'set' - log lookup
   1004   bool IsMultiEps(const set<Label> &multi_eps_labels, Label label) const {
   1005     return multi_eps_labels.Find(label) != multi_eps_labels.end();
   1006   }
   1007 
   1008   M *matcher_;
   1009   uint32 flags_;
   1010   bool own_matcher_;             // Does this class delete the matcher?
   1011 
   1012   // Multi-eps label set
   1013   CompactSet<Label, kNoLabel> multi_eps_labels_;
   1014   typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
   1015 
   1016   bool current_loop_;            // Current arc is the implicit loop
   1017   mutable Arc loop_;             // For non-consuming symbols
   1018   bool done_;                    // Matching done
   1019 
   1020   void operator=(const MultiEpsMatcher<M> &);  // Disallow
   1021 };
   1022 
   1023 template <class M> inline
   1024 bool MultiEpsMatcher<M>::Find(Label match_label) {
   1025   multi_eps_iter_ = multi_eps_labels_.End();
   1026   current_loop_ = false;
   1027   bool ret;
   1028   if (match_label == 0) {
   1029     ret = matcher_->Find(0);
   1030   } else if (match_label == kNoLabel) {
   1031     if (flags_ & kMultiEpsList) {
   1032       // return all non-consuming arcs (incl. epsilon)
   1033       multi_eps_iter_ = multi_eps_labels_.Begin();
   1034       while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
   1035              !matcher_->Find(*multi_eps_iter_))
   1036         ++multi_eps_iter_;
   1037       if (multi_eps_iter_ != multi_eps_labels_.End())
   1038         ret = true;
   1039       else
   1040         ret = matcher_->Find(kNoLabel);
   1041     } else {
   1042       // return all epsilon arcs
   1043       ret = matcher_->Find(kNoLabel);
   1044     }
   1045   } else if ((flags_ & kMultiEpsLoop) &&
   1046              multi_eps_labels_.Find(match_label) != multi_eps_labels_.End()) {
   1047     // return 'implicit' loop
   1048     current_loop_ = true;
   1049     ret = true;
   1050   } else {
   1051     ret = matcher_->Find(match_label);
   1052   }
   1053   done_ = !ret;
   1054   return ret;
   1055 }
   1056 
   1057 
   1058 // Generic matcher, templated on the FST definition
   1059 // - a wrapper around pointer to specific one.
   1060 // Here is a typical use: \code
   1061 //   Matcher<StdFst> matcher(fst, MATCH_INPUT);
   1062 //   matcher.SetState(state);
   1063 //   if (matcher.Find(label))
   1064 //     for (; !matcher.Done(); matcher.Next()) {
   1065 //       StdArc &arc = matcher.Value();
   1066 //       ...
   1067 //     } \endcode
   1068 template <class F>
   1069 class Matcher {
   1070  public:
   1071   typedef F FST;
   1072   typedef typename F::Arc Arc;
   1073   typedef typename Arc::StateId StateId;
   1074   typedef typename Arc::Label Label;
   1075   typedef typename Arc::Weight Weight;
   1076 
   1077   Matcher(const F &fst, MatchType match_type) {
   1078     base_ = fst.InitMatcher(match_type);
   1079     if (!base_)
   1080       base_ = new SortedMatcher<F>(fst, match_type);
   1081   }
   1082 
   1083   Matcher(const Matcher<F> &matcher, bool safe = false) {
   1084     base_ = matcher.base_->Copy(safe);
   1085   }
   1086 
   1087   // Takes ownership of the provided matcher
   1088   Matcher(MatcherBase<Arc>* base_matcher) { base_ = base_matcher; }
   1089 
   1090   ~Matcher() { delete base_; }
   1091 
   1092   Matcher<F> *Copy(bool safe = false) const {
   1093     return new Matcher<F>(*this, safe);
   1094   }
   1095 
   1096   MatchType Type(bool test) const { return base_->Type(test); }
   1097   void SetState(StateId s) { base_->SetState(s); }
   1098   bool Find(Label label) { return base_->Find(label); }
   1099   bool Done() const { return base_->Done(); }
   1100   const Arc& Value() const { return base_->Value(); }
   1101   void Next() { base_->Next(); }
   1102   const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); }
   1103   uint64 Properties(uint64 props) const { return base_->Properties(props); }
   1104   uint32 Flags() const { return base_->Flags() & kMatcherFlags; }
   1105 
   1106  private:
   1107   MatcherBase<Arc> *base_;
   1108 
   1109   void operator=(const Matcher<Arc> &);  // disallow
   1110 };
   1111 
   1112 }  // namespace fst
   1113 
   1114 
   1115 
   1116 #endif  // FST_LIB_MATCHER_H__
   1117