Home | History | Annotate | Download | only in fst
      1 // weight.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // General weight set and associated semiring operation definitions.
     20 //
     21 // A semiring is specified by two binary operations Plus and Times and
     22 // two designated elements Zero and One with the following properties:
     23 //   Plus: associative, commutative, and has Zero as its identity.
     24 //   Times: associative and has identity One, distributes w.r.t. Plus, and
     25 //     has Zero as an annihilator:
     26 //          Times(Zero(), a) == Times(a, Zero()) = Zero().
     27 //
     28 //  A left semiring distributes on the left; a right semiring is
     29 //  similarly defined.
     30 //
     31 // A Weight class must have binary functions =Plus= and =Times= and
     32 // static member functions =Zero()= and =One()= and these must form
     33 // (at least) a left or right semiring.
     34 //
     35 // In addition, the following should be defined for a Weight:
     36 //   Member: predicate on set membership.
     37 //   NoWeight: static member function that returns an element that is
     38 //      not a set member; used to signal an error.
     39 //   >>: reads textual representation of a weight.
     40 //   <<: prints textual representation of a weight.
     41 //   Read(istream &strm): reads binary representation of a weight.
     42 //   Write(ostream &strm): writes binary representation of a weight.
     43 //   Hash: maps weight to size_t.
     44 //   ApproxEqual: approximate equality (for inexact weights)
     45 //   Quantize: quantizes wrt delta (for inexact weights)
     46 //   Divide: for all a,b,c s.t. Times(a, b) == c
     47 //     --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member()
     48 //      and Times(a, b') == c
     49 //     --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member()
     50 //      and Times(a', b) == c
     51 //     --> b' = Divide(c, a) = Divide(c, a, DIVIDE_ANY) =
     52 //      Divide(c, a, DIVIDE_LEFT) = Divide(c, a, DIVIDE_RIGHT) if a
     53 //      commutative semiring, b'.Member() and Times(a, b') = Times(b', a) = c
     54 //   ReverseWeight: the type of the corresponding reverse weight.
     55 //     Typically the same type as Weight for a (both left and right) semiring.
     56 //     For the left string semiring, it is the right string semiring.
     57 //   Reverse: a mapping from Weight to ReverseWeight s.t.
     58 //     --> Reverse(Reverse(a)) = a
     59 //     --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
     60 //     --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
     61 //     Typically the identity mapping in a (both left and right) semiring.
     62 //     In the left string semiring, it maps to the reverse string
     63 //     in the right string semiring.
     64 //   Properties: specifies additional properties that hold:
     65 //      LeftSemiring: indicates weights form a left semiring.
     66 //      RightSemiring: indicates weights form a right semiring.
     67 //      Commutative: for all a,b: Times(a,b) == Times(b,a)
     68 //      Idempotent: for all a: Plus(a, a) == a.
     69 //      Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
     70 
     71 
     72 #ifndef FST_LIB_WEIGHT_H__
     73 #define FST_LIB_WEIGHT_H__
     74 
     75 #include <cmath>
     76 #include <cctype>
     77 #include <iostream>
     78 #include <sstream>
     79 
     80 #include <fst/compat.h>
     81 
     82 #include <fst/util.h>
     83 
     84 
     85 namespace fst {
     86 
     87 //
     88 // CONSTANT DEFINITIONS
     89 //
     90 
     91 // A representable float near .001
     92 const float kDelta =                   1.0F/1024.0F;
     93 
     94 // For all a,b,c: Times(c, Plus(a,b)) = Plus(Times(c,a), Times(c, b))
     95 const uint64 kLeftSemiring =           0x0000000000000001ULL;
     96 
     97 // For all a,b,c: Times(Plus(a,b), c) = Plus(Times(a,c), Times(b, c))
     98 const uint64 kRightSemiring =          0x0000000000000002ULL;
     99 
    100 const uint64 kSemiring = kLeftSemiring | kRightSemiring;
    101 
    102 // For all a,b: Times(a,b) = Times(b,a)
    103 const uint64 kCommutative =       0x0000000000000004ULL;
    104 
    105 // For all a: Plus(a, a) = a
    106 const uint64 kIdempotent =             0x0000000000000008ULL;
    107 
    108 // For all a,b: Plus(a,b) = a or Plus(a,b) = b
    109 const uint64 kPath =                   0x0000000000000010ULL;
    110 
    111 
    112 // Determines direction of division.
    113 enum DivideType { DIVIDE_LEFT,   // left division
    114                   DIVIDE_RIGHT,  // right division
    115                   DIVIDE_ANY };  // division in a commutative semiring
    116 
    117 // NATURAL ORDER
    118 //
    119 // By definition:
    120 //                 a <= b iff a + b = a
    121 // The natural order is a negative partial order iff the semiring is
    122 // idempotent. It is trivially monotonic for plus. It is left
    123 // (resp. right) monotonic for times iff the semiring is left
    124 // (resp. right) distributive. It is a total order iff the semiring
    125 // has the path property. See Mohri, "Semiring Framework and
    126 // Algorithms for Shortest-Distance Problems", Journal of Automata,
    127 // Languages and Combinatorics 7(3):321-350, 2002. We define the
    128 // strict version of this order below.
    129 
    130 template <class W>
    131 class NaturalLess {
    132  public:
    133   typedef W Weight;
    134 
    135   NaturalLess() {
    136     if (!(W::Properties() & kIdempotent)) {
    137       FSTERROR() << "NaturalLess: Weight type is not idempotent: "
    138                  << W::Type();
    139     }
    140   }
    141 
    142   bool operator()(const W &w1, const W &w2) const {
    143     return (Plus(w1, w2) == w1) && w1 != w2;
    144   }
    145 };
    146 
    147 
    148 // Power is the iterated product for arbitrary semirings such that
    149 // Power(w, 0) is One() for the semiring, and
    150 // Power(w, n) = Times(Power(w, n-1), w)
    151 
    152 template <class W>
    153 W Power(W w, size_t n) {
    154   W result = W::One();
    155   for (size_t i = 0; i < n; ++i) {
    156     result = Times(result, w);
    157   }
    158   return result;
    159 }
    160 
    161 // General weight converter - raises error.
    162 template <class W1, class W2>
    163 struct WeightConvert {
    164   W2 operator()(W1 w1) const {
    165     FSTERROR() << "WeightConvert: can't convert weight from \""
    166                << W1::Type() << "\" to \"" << W2::Type();
    167     return W2::NoWeight();
    168   }
    169 };
    170 
    171 // Specialized weight converter to self.
    172 template <class W>
    173 struct WeightConvert<W, W> {
    174   W operator()(W w) const { return w; }
    175 };
    176 
    177 }  // namespace fst
    178 
    179 #endif  // FST_LIB_WEIGHT_H__
    180