Home | History | Annotate | Download | only in lib
      1 // string-weight.h
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 //
     16 // \file
     17 // String weight set and associated semiring operation definitions.
     18 
     19 #ifndef FST_LIB_STRING_WEIGHT_H__
     20 #define FST_LIB_STRING_WEIGHT_H__
     21 
     22 #include <list>
     23 
     24 #include "fst/lib/product-weight.h"
     25 #include "fst/lib/weight.h"
     26 
     27 namespace fst {
     28 
     29 const int kStringInfinity = -1;      // Label for the infinite string
     30 const int kStringBad = -2;           // Label for a non-string
     31 const char kStringSeparator = '_';   // Label separator in strings
     32 
     33 // Determines whether to use left or right string semiring.  Includes
     34 // restricted versions that signal an error if proper prefixes
     35 // (suffixes) would otherwise be returned by Plus, useful with various
     36 // algorithms that require functional transducer input with the
     37 // string semirings.
     38 enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 ,
     39                   STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT };
     40 
     41 #define REVERSE_STRING_TYPE(S)                                  \
     42    ((S) == STRING_LEFT ? STRING_RIGHT :                         \
     43     ((S) == STRING_RIGHT ? STRING_LEFT :                        \
     44      ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT :     \
     45       STRING_LEFT_RESTRICT)))
     46 
     47 template <typename L, StringType S = STRING_LEFT>
     48 class StringWeight;
     49 
     50 template <typename L, StringType S = STRING_LEFT>
     51 class StringWeightIterator;
     52 
     53 template <typename L, StringType S = STRING_LEFT>
     54 class StringWeightReverseIterator;
     55 
     56 template <typename L, StringType S>
     57 bool operator==(const StringWeight<L, S> &,  const StringWeight<L, S> &);
     58 
     59 
     60 // String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon)
     61 template <typename L, StringType S>
     62 class StringWeight {
     63  public:
     64   typedef L Label;
     65   typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight;
     66 
     67   friend class StringWeightIterator<L, S>;
     68   friend class StringWeightReverseIterator<L, S>;
     69   friend bool operator==<>(const StringWeight<L, S> &,
     70                            const StringWeight<L, S> &);
     71 
     72   StringWeight() { Init(); }
     73 
     74   template <typename Iter>
     75   StringWeight(const Iter &begin, const Iter &end) {
     76     Init();
     77     for (Iter iter = begin; iter != end; ++iter)
     78       PushBack(*iter);
     79   }
     80 
     81   explicit StringWeight(L l) { Init(); PushBack(l); }
     82 
     83   static const StringWeight<L, S> &Zero() {
     84     static const StringWeight<L, S> zero(kStringInfinity);
     85     return zero;
     86   }
     87 
     88   static const StringWeight<L, S> &One() {
     89     static const StringWeight<L, S> one;
     90     return one;
     91   }
     92 
     93   static const string &Type() {
     94     static const string type =
     95         S == STRING_LEFT ? "string" :
     96         (S == STRING_RIGHT ? "right_string" :
     97          (S == STRING_LEFT_RESTRICT ? "restricted_string" :
     98           "right_restricted_string"));
     99     return type;
    100   }
    101 
    102   bool Member() const;
    103 
    104   istream &Read(istream &strm);
    105 
    106   ostream &Write(ostream &strm) const;
    107 
    108   ssize_t Hash() const;
    109 
    110   StringWeight<L, S> Quantize(float delta = kDelta) const {
    111     return *this;
    112   }
    113 
    114   ReverseWeight Reverse() const;
    115 
    116   static uint64 Properties() {
    117     return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ?
    118             kLeftSemiring : kRightSemiring) | kIdempotent;
    119   }
    120 
    121   // NB: This needs to be uncommented only if default fails for this impl.
    122   // StringWeight<L, S> &operator=(const StringWeight<L, S> &w);
    123 
    124   // These operations combined with the StringWeightIterator and
    125   // StringWeightReverseIterator provide the access and mutation of
    126   // the string internal elements.
    127 
    128   // Common initializer among constructors.
    129   void Init() { first_ = 0; }
    130 
    131   // Clear existing StringWeight.
    132   void Clear() { first_ = 0; rest_.clear(); }
    133 
    134   Label Size() const { return first_ ? rest_.size() + 1 : 0; }
    135 
    136   void PushFront(L l) {
    137     if (first_)
    138       rest_.push_front(first_);
    139     first_ = l;
    140   }
    141 
    142   void PushBack(L l) {
    143     if (!first_)
    144       first_ = l;
    145     else
    146       rest_.push_back(l);
    147   }
    148 
    149  private:
    150   L first_;         // first label in string (0 if empty)
    151   list<L> rest_;    // remaining labels in string
    152 };
    153 
    154 
    155 // Traverses string in forward direction.
    156 template <typename L, StringType S>
    157 class StringWeightIterator {
    158  public:
    159   explicit StringWeightIterator(const StringWeight<L, S>& w)
    160       : first_(w.first_), rest_(w.rest_), init_(true),
    161         iter_(rest_.begin()) {}
    162 
    163   bool Done() const {
    164     if (init_) return first_ == 0;
    165     else return iter_ == rest_.end();
    166   }
    167 
    168   const L& Value() const { return init_ ? first_ : *iter_; }
    169 
    170   void Next() {
    171     if (init_) init_ = false;
    172     else  ++iter_;
    173   }
    174 
    175   void Reset() {
    176     init_ = true;
    177     iter_ = rest_.begin();
    178   }
    179 
    180  private:
    181   const L &first_;
    182   const list<L> &rest_;
    183   bool init_;   // in the initialized state?
    184   typename list<L>::const_iterator iter_;
    185 
    186   DISALLOW_EVIL_CONSTRUCTORS(StringWeightIterator);
    187 };
    188 
    189 
    190 // Traverses string in forward direction.
    191 template <typename L, StringType S>
    192 class StringWeightReverseIterator {
    193  public:
    194   explicit StringWeightReverseIterator(const StringWeight<L, S>& w)
    195       : first_(w.first_), rest_(w.rest_), fin_(first_ == 0),
    196         iter_(rest_.rbegin()) {}
    197 
    198   bool Done() const { return fin_; }
    199 
    200   const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; }
    201 
    202   void Next() {
    203     if (iter_ == rest_.rend()) fin_ = true;
    204     else  ++iter_;
    205   }
    206 
    207   void Reset() {
    208     fin_ = false;
    209     iter_ = rest_.rbegin();
    210   }
    211 
    212  private:
    213   const L &first_;
    214   const list<L> &rest_;
    215   bool fin_;   // in the final state?
    216   typename list<L>::const_reverse_iterator iter_;
    217 
    218   DISALLOW_EVIL_CONSTRUCTORS(StringWeightReverseIterator);
    219 };
    220 
    221 
    222 // StringWeight member functions follow that require
    223 // StringWeightIterator or StringWeightReverseIterator.
    224 
    225 template <typename L, StringType S>
    226 inline istream &StringWeight<L, S>::Read(istream &strm) {
    227   Clear();
    228   int32 size = 0;
    229   ReadType(strm, &size);
    230   for (int i = 0; i < size; ++i) {
    231     L label;
    232     ReadType(strm, &label);
    233     PushBack(label);
    234   }
    235   return strm;
    236 }
    237 
    238 template <typename L, StringType S>
    239 inline ostream &StringWeight<L, S>::Write(ostream &strm) const {
    240   int32 size =  Size();
    241   WriteType(strm, size);
    242   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) {
    243     L label = iter.Value();
    244     WriteType(strm, label);
    245   }
    246   return strm;
    247 }
    248 
    249 template <typename L, StringType S>
    250 inline bool StringWeight<L, S>::Member() const {
    251   if (Size() != 1)
    252     return true;
    253   StringWeightIterator<L, S> iter(*this);
    254   return iter.Value() != kStringBad;
    255 }
    256 
    257 template <typename L, StringType S>
    258 inline typename StringWeight<L, S>::ReverseWeight
    259 StringWeight<L, S>::Reverse() const {
    260   ReverseWeight rw;
    261   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
    262     rw.PushFront(iter.Value());
    263   return rw;
    264 }
    265 
    266 template <typename L, StringType S>
    267 inline ssize_t StringWeight<L, S>::Hash() const {
    268   size_t h = 0;
    269   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
    270     h ^= h<<1 ^ iter.Value();
    271   return static_cast<ssize_t>(h);
    272 }
    273 
    274 // NB: This needs to be uncommented only if default fails for this the impl.
    275 //
    276 // template <typename L, StringType S>
    277 // inline StringWeight<L, S>
    278 // &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) {
    279 //   if (this != &w) {
    280 //     Clear();
    281 //     for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next())
    282 //       PushBack(iter.Value());
    283 //   }
    284 //   return *this;
    285 // }
    286 
    287 template <typename L, StringType S>
    288 inline bool operator==(const StringWeight<L, S> &w1,
    289                        const StringWeight<L, S> &w2) {
    290   if (w1.Size() != w2.Size())
    291     return false;
    292 
    293   StringWeightIterator<L, S> iter1(w1);
    294   StringWeightIterator<L, S> iter2(w2);
    295 
    296   for (; !iter1.Done() ; iter1.Next(), iter2.Next())
    297     if (iter1.Value() != iter2.Value())
    298       return false;
    299 
    300   return true;
    301 }
    302 
    303 template <typename L, StringType S>
    304 inline bool operator!=(const StringWeight<L, S> &w1,
    305                        const StringWeight<L, S> &w2) {
    306   return !(w1 == w2);
    307 }
    308 
    309 template <typename L, StringType S>
    310 inline bool ApproxEqual(const StringWeight<L, S> &w1,
    311                         const StringWeight<L, S> &w2,
    312                         float delta = kDelta) {
    313   return w1 == w2;
    314 }
    315 
    316 template <typename L, StringType S>
    317 inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) {
    318   StringWeightIterator<L, S> iter(w);
    319   if (iter.Done())
    320     return strm << "Epsilon";
    321   else if (iter.Value() == kStringInfinity)
    322     return strm << "Infinity";
    323   else if (iter.Value() == kStringBad)
    324     return strm << "BadString";
    325   else
    326     for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
    327       if (i > 0)
    328         strm << kStringSeparator;
    329       strm << iter.Value();
    330     }
    331   return strm;
    332 }
    333 
    334 template <typename L, StringType S>
    335 inline istream &operator>>(istream &strm, StringWeight<L, S> &w) {
    336   string s;
    337   strm >> s;
    338   if (s == "Infinity") {
    339     w = StringWeight<L, S>::Zero();
    340   } else if (s == "Epsilon") {
    341     w = StringWeight<L, S>::One();
    342   } else {
    343     w.Clear();
    344     char *p = 0;
    345     for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) {
    346       int l = strtoll(cs, &p, 10);
    347       if (p == cs || *p != 0 && *p != kStringSeparator) {
    348         strm.clear(std::ios::badbit);
    349         break;
    350       }
    351       w.PushBack(l);
    352     }
    353   }
    354   return strm;
    355 }
    356 
    357 
    358 // Default is for the restricted left and right semirings.  String
    359 // equality is required (for non-Zero() input. This restriction
    360 // is used in e.g. Determinize to ensure functional input.
    361 template <typename L, StringType S>  inline StringWeight<L, S>
    362 Plus(const StringWeight<L, S> &w1,
    363      const StringWeight<L, S> &w2) {
    364   if (w1 == StringWeight<L, S>::Zero())
    365     return w2;
    366   if (w2 == StringWeight<L, S>::Zero())
    367     return w1;
    368 
    369   if (w1 != w2)
    370     LOG(FATAL) << "StringWeight::Plus: unequal arguments "
    371                << "(non-functional FST?)";
    372 
    373   return w1;
    374 }
    375 
    376 
    377 // Longest common prefix for left string semiring.
    378 template <typename L>  inline StringWeight<L, STRING_LEFT>
    379 Plus(const StringWeight<L, STRING_LEFT> &w1,
    380      const StringWeight<L, STRING_LEFT> &w2) {
    381   if (w1 == StringWeight<L, STRING_LEFT>::Zero())
    382     return w2;
    383   if (w2 == StringWeight<L, STRING_LEFT>::Zero())
    384     return w1;
    385 
    386   StringWeight<L, STRING_LEFT> sum;
    387   StringWeightIterator<L, STRING_LEFT> iter1(w1);
    388   StringWeightIterator<L, STRING_LEFT> iter2(w2);
    389   for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
    390        iter1.Next(), iter2.Next())
    391     sum.PushBack(iter1.Value());
    392   return sum;
    393 }
    394 
    395 
    396 // Longest common suffix for right string semiring.
    397 template <typename L>  inline StringWeight<L, STRING_RIGHT>
    398 Plus(const StringWeight<L, STRING_RIGHT> &w1,
    399      const StringWeight<L, STRING_RIGHT> &w2) {
    400   if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
    401     return w2;
    402   if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
    403     return w1;
    404 
    405   StringWeight<L, STRING_RIGHT> sum;
    406   StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1);
    407   StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2);
    408   for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
    409        iter1.Next(), iter2.Next())
    410     sum.PushFront(iter1.Value());
    411   return sum;
    412 }
    413 
    414 
    415 template <typename L, StringType S>
    416 inline StringWeight<L, S> Times(const StringWeight<L, S> &w1,
    417                              const StringWeight<L, S> &w2) {
    418   if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero())
    419     return StringWeight<L, S>::Zero();
    420 
    421   StringWeight<L, S> prod(w1);
    422   for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next())
    423     prod.PushBack(iter.Value());
    424 
    425   return prod;
    426 }
    427 
    428 
    429 // Default is for left division in the left string and the
    430 // left restricted string semirings.
    431 template <typename L, StringType S> inline StringWeight<L, S>
    432 Divide(const StringWeight<L, S> &w1,
    433        const StringWeight<L, S> &w2,
    434        DivideType typ) {
    435 
    436   if (typ != DIVIDE_LEFT)
    437     LOG(FATAL) << "StringWeight::Divide: only left division is defined "
    438                << "for the " << StringWeight<L, S>::Type() << " semiring";
    439 
    440   if (w2 == StringWeight<L, S>::Zero())
    441     return StringWeight<L, S>(kStringBad);
    442   else if (w1 == StringWeight<L, S>::Zero())
    443     return StringWeight<L, S>::Zero();
    444 
    445   StringWeight<L, S> div;
    446   StringWeightIterator<L, S> iter(w1);
    447   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
    448     if (i >= w2.Size())
    449       div.PushBack(iter.Value());
    450   }
    451   return div;
    452 }
    453 
    454 
    455 // Right division in the right string semiring.
    456 template <typename L> inline StringWeight<L, STRING_RIGHT>
    457 Divide(const StringWeight<L, STRING_RIGHT> &w1,
    458        const StringWeight<L, STRING_RIGHT> &w2,
    459        DivideType typ) {
    460 
    461   if (typ != DIVIDE_RIGHT)
    462     LOG(FATAL) << "StringWeight::Divide: only right division is defined "
    463                << "for the right string semiring";
    464 
    465   if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
    466     return StringWeight<L, STRING_RIGHT>(kStringBad);
    467   else if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
    468     return StringWeight<L, STRING_RIGHT>::Zero();
    469 
    470   StringWeight<L, STRING_RIGHT> div;
    471   StringWeightReverseIterator<L, STRING_RIGHT> iter(w1);
    472   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
    473     if (i >= w2.Size())
    474       div.PushFront(iter.Value());
    475   }
    476   return div;
    477 }
    478 
    479 
    480 // Right division in the right restricted string semiring.
    481 template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT>
    482 Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1,
    483        const StringWeight<L, STRING_RIGHT_RESTRICT> &w2,
    484        DivideType typ) {
    485 
    486   if (typ != DIVIDE_RIGHT)
    487     LOG(FATAL) << "StringWeight::Divide: only right division is defined "
    488                << "for the right restricted string semiring";
    489 
    490   if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
    491     return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad);
    492   else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
    493     return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero();
    494 
    495   StringWeight<L, STRING_RIGHT_RESTRICT> div;
    496   StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1);
    497   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
    498     if (i >= w2.Size())
    499       div.PushFront(iter.Value());
    500   }
    501   return div;
    502 }
    503 
    504 
    505 // Product of string weight and an arbitray weight.
    506 template <class L, class W, StringType S = STRING_LEFT>
    507 struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> {
    508   typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)>
    509   ReverseWeight;
    510 
    511   GallicWeight() {}
    512 
    513   GallicWeight(StringWeight<L, S> w1, W w2)
    514       : ProductWeight<StringWeight<L, S>, W>(w1, w2) {}
    515 
    516   explicit GallicWeight(const string &s, int *nread = 0)
    517       : ProductWeight<StringWeight<L, S>, W>(s, nread) {}
    518 
    519   GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w)
    520       : ProductWeight<StringWeight<L, S>, W>(w) {}
    521 };
    522 
    523 }  // namespace fst;
    524 
    525 #endif  // FST_LIB_STRING_WEIGHT_H__
    526