Home | History | Annotate | Download | only in fst
      1 // properties.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: Michael Riley <riley (at) google.com>
     17 // \file
     18 // FST property bits.
     19 
     20 #ifndef FST_LIB_PROPERTIES_H__
     21 #define FST_LIB_PROPERTIES_H__
     22 
     23 #include <sys/types.h>
     24 #include <vector>
     25 using std::vector;
     26 
     27 #include <fst/compat.h>
     28 
     29 namespace fst {
     30 
     31 // The property bits here assert facts about an FST. If individual
     32 // bits are added, then the composite properties below, the property
     33 // functions and property names in properties.cc, and
     34 // TestProperties() in test-properties.h should be updated.
     35 
     36 //
     37 // BINARY PROPERTIES
     38 //
     39 // For each property below, there is a single bit. If it is set,
     40 // the property is true. If it is not set, the property is false.
     41 //
     42 
     43 // The Fst is an ExpandedFst
     44 const uint64 kExpanded =          0x0000000000000001ULL;
     45 
     46 // The Fst is a MutableFst
     47 const uint64 kMutable =           0x0000000000000002ULL;
     48 
     49 // An error was detected while constructing/using the FST
     50 const uint64 kError =             0x0000000000000004ULL;
     51 
     52 //
     53 // TRINARY PROPERTIES
     54 //
     55 // For each of these properties below there is a pair of property bits
     56 // - one positive and one negative. If the positive bit is set, the
     57 // property is true. If the negative bit is set, the property is
     58 // false. If neither is set, the property has unknown value. Both
     59 // should never be simultaneously set. The individual positive and
     60 // negative bit pairs should be adjacent with the positive bit
     61 // at an odd and lower position.
     62 
     63 // ilabel == olabel for each arc
     64 const uint64 kAcceptor =          0x0000000000010000ULL;
     65 // ilabel != olabel for some arc
     66 const uint64 kNotAcceptor =       0x0000000000020000ULL;
     67 
     68 // ilabels unique leaving each state
     69 const uint64 kIDeterministic =    0x0000000000040000ULL;
     70 // ilabels not unique leaving some state
     71 const uint64 kNonIDeterministic = 0x0000000000080000ULL;
     72 
     73 // olabels unique leaving each state
     74 const uint64 kODeterministic =    0x0000000000100000ULL;
     75 // olabels not unique leaving some state
     76 const uint64 kNonODeterministic = 0x0000000000200000ULL;
     77 
     78 // FST has input/output epsilons
     79 const uint64 kEpsilons =          0x0000000000400000ULL;
     80 // FST has no input/output epsilons
     81 const uint64 kNoEpsilons =        0x0000000000800000ULL;
     82 
     83 // FST has input epsilons
     84 const uint64 kIEpsilons =         0x0000000001000000ULL;
     85 // FST has no input epsilons
     86 const uint64 kNoIEpsilons =       0x0000000002000000ULL;
     87 
     88 // FST has output epsilons
     89 const uint64 kOEpsilons =         0x0000000004000000ULL;
     90 // FST has no output epsilons
     91 const uint64 kNoOEpsilons =       0x0000000008000000ULL;
     92 
     93 // ilabels sorted wrt < for each state
     94 const uint64 kILabelSorted =      0x0000000010000000ULL;
     95 // ilabels not sorted wrt < for some state
     96 const uint64 kNotILabelSorted =   0x0000000020000000ULL;
     97 
     98 // olabels sorted wrt < for each state
     99 const uint64 kOLabelSorted =      0x0000000040000000ULL;
    100 // olabels not sorted wrt < for some state
    101 const uint64 kNotOLabelSorted =   0x0000000080000000ULL;
    102 
    103 // Non-trivial arc or final weights
    104 const uint64 kWeighted =          0x0000000100000000ULL;
    105 // Only trivial arc and final weights
    106 const uint64 kUnweighted =        0x0000000200000000ULL;
    107 
    108 // FST has cycles
    109 const uint64 kCyclic =            0x0000000400000000ULL;
    110 // FST has no cycles
    111 const uint64 kAcyclic =           0x0000000800000000ULL;
    112 
    113 // FST has cycles containing the initial state
    114 const uint64 kInitialCyclic =     0x0000001000000000ULL;
    115 // FST has no cycles containing the initial state
    116 const uint64 kInitialAcyclic =    0x0000002000000000ULL;
    117 
    118 // FST is topologically sorted
    119 const uint64 kTopSorted =         0x0000004000000000ULL;
    120 // FST is not topologically sorted
    121 const uint64 kNotTopSorted =      0x0000008000000000ULL;
    122 
    123 // All states reachable from the initial state
    124 const uint64 kAccessible =        0x0000010000000000ULL;
    125 // Not all states reachable from the initial state
    126 const uint64 kNotAccessible =     0x0000020000000000ULL;
    127 
    128 // All states can reach a final state
    129 const uint64 kCoAccessible =      0x0000040000000000ULL;
    130 // Not all states can reach a final state
    131 const uint64 kNotCoAccessible =   0x0000080000000000ULL;
    132 
    133 // If NumStates() > 0, then state 0 is initial, state NumStates()-1 is
    134 // final, there is a transition from each non-final state i to
    135 // state i+1, and there are no other transitions.
    136 const uint64 kString =            0x0000100000000000ULL;
    137 
    138 // Not a string FST
    139 const uint64 kNotString =         0x0000200000000000ULL;
    140 
    141 //
    142 // COMPOSITE PROPERTIES
    143 //
    144 
    145 // Properties of an empty machine
    146 const uint64 kNullProperties
    147   = kAcceptor | kIDeterministic | kODeterministic | kNoEpsilons |
    148     kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
    149     kUnweighted | kAcyclic | kInitialAcyclic | kTopSorted |
    150     kAccessible | kCoAccessible | kString;
    151 
    152 // Properties that are preserved when an FST is copied
    153 const uint64 kCopyProperties
    154   = kError | kAcceptor | kNotAcceptor | kIDeterministic | kNonIDeterministic |
    155     kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
    156     kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
    157     kILabelSorted | kNotILabelSorted | kOLabelSorted |
    158     kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
    159     kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
    160     kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
    161     kString | kNotString;
    162 
    163 // Properites that are intrinsic to the FST
    164 const uint64 kIntrinsicProperties
    165   = kExpanded | kMutable | kAcceptor | kNotAcceptor | kIDeterministic |
    166     kNonIDeterministic | kODeterministic | kNonODeterministic |
    167     kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
    168     kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
    169     kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
    170     kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
    171     kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
    172     kString | kNotString;
    173 
    174 // Properites that are (potentially) extrinsic to the FST
    175 const uint64 kExtrinsicProperties = kError;
    176 
    177 // Properties that are preserved when an FST start state is set
    178 const uint64 kSetStartProperties
    179   = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
    180     kIDeterministic | kNonIDeterministic | kODeterministic |
    181     kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
    182     kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
    183     kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
    184     kUnweighted | kCyclic | kAcyclic | kTopSorted | kNotTopSorted |
    185     kCoAccessible | kNotCoAccessible;
    186 
    187 // Properties that are preserved when an FST final weight is set
    188 const uint64 kSetFinalProperties
    189   = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
    190     kIDeterministic | kNonIDeterministic | kODeterministic |
    191     kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
    192     kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
    193     kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kCyclic |
    194     kAcyclic | kInitialCyclic | kInitialAcyclic | kTopSorted |
    195     kNotTopSorted | kAccessible | kNotAccessible;
    196 
    197 // Properties that are preserved when an FST state is added
    198 const uint64 kAddStateProperties
    199   = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
    200     kIDeterministic | kNonIDeterministic | kODeterministic |
    201     kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
    202     kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
    203     kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
    204     kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
    205     kInitialAcyclic | kTopSorted | kNotTopSorted | kNotAccessible |
    206     kNotCoAccessible | kNotString;
    207 
    208 // Properties that are preserved when an FST arc is added
    209 const uint64 kAddArcProperties = kExpanded | kMutable | kError | kNotAcceptor |
    210     kNonIDeterministic | kNonODeterministic | kEpsilons | kIEpsilons |
    211     kOEpsilons | kNotILabelSorted | kNotOLabelSorted | kWeighted |
    212     kCyclic | kInitialCyclic | kNotTopSorted | kAccessible | kCoAccessible;
    213 
    214 // Properties that are preserved when an FST arc is set
    215 const uint64 kSetArcProperties = kExpanded | kMutable | kError;
    216 
    217 // Properties that are preserved when FST states are deleted
    218 const uint64 kDeleteStatesProperties
    219   = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
    220     kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
    221     kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
    222     kInitialAcyclic | kTopSorted;
    223 
    224 // Properties that are preserved when FST arcs are deleted
    225 const uint64 kDeleteArcsProperties
    226   = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
    227     kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
    228     kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
    229     kInitialAcyclic | kTopSorted |  kNotAccessible | kNotCoAccessible;
    230 
    231 // Properties that are preserved when an FST's states are reordered
    232 const uint64 kStateSortProperties = kExpanded | kMutable | kError | kAcceptor |
    233     kNotAcceptor | kIDeterministic | kNonIDeterministic |
    234     kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
    235     kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
    236     kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted
    237     | kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
    238     kInitialAcyclic | kAccessible | kNotAccessible | kCoAccessible |
    239     kNotCoAccessible;
    240 
    241 // Properties that are preserved when an FST's arcs are reordered
    242 const uint64 kArcSortProperties =
    243   kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
    244   kNonIDeterministic | kODeterministic | kNonODeterministic |
    245   kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
    246   kNoOEpsilons | kWeighted | kUnweighted | kCyclic | kAcyclic |
    247   kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
    248   kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
    249   kString | kNotString;
    250 
    251 // Properties that are preserved when an FST's input labels are changed.
    252 const uint64 kILabelInvariantProperties =
    253   kExpanded | kMutable | kError | kODeterministic | kNonODeterministic |
    254   kOEpsilons | kNoOEpsilons | kOLabelSorted | kNotOLabelSorted |
    255   kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
    256   kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
    257   kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
    258 
    259 // Properties that are preserved when an FST's output labels are changed.
    260 const uint64 kOLabelInvariantProperties =
    261   kExpanded | kMutable | kError | kIDeterministic | kNonIDeterministic |
    262   kIEpsilons | kNoIEpsilons | kILabelSorted | kNotILabelSorted |
    263   kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
    264   kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
    265   kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
    266 
    267 // Properties that are preserved when an FST's weights are changed.
    268 // This assumes that the set of states that are non-final is not changed.
    269 const uint64 kWeightInvariantProperties =
    270   kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
    271   kNonIDeterministic | kODeterministic | kNonODeterministic |
    272   kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
    273   kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
    274   kNotOLabelSorted | kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
    275   kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | kCoAccessible |
    276   kNotCoAccessible | kString | kNotString;
    277 
    278 // Properties that are preserved when a superfinal state is added
    279 // and an FSTs final weights are directed to it via new transitions.
    280 const uint64 kAddSuperFinalProperties  = kExpanded | kMutable | kError |
    281     kAcceptor | kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    282     kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | kNotOLabelSorted |
    283     kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
    284     kInitialAcyclic | kNotTopSorted | kNotAccessible | kCoAccessible |
    285     kNotCoAccessible | kNotString;
    286 
    287 // Properties that are preserved when a superfinal state is removed
    288 // and the epsilon transitions directed to it are made final weights.
    289 const uint64 kRmSuperFinalProperties  = kExpanded | kMutable | kError |
    290     kAcceptor | kNotAcceptor | kIDeterministic | kODeterministic |
    291     kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
    292     kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
    293     kInitialAcyclic | kTopSorted | kAccessible | kCoAccessible |
    294     kNotCoAccessible | kString;
    295 
    296 // All binary properties
    297 const uint64 kBinaryProperties =  0x0000000000000007ULL;
    298 
    299 // All trinary properties
    300 const uint64 kTrinaryProperties = 0x00003fffffff0000ULL;
    301 
    302 //
    303 // COMPUTED PROPERTIES
    304 //
    305 
    306 // 1st bit of trinary properties
    307 const uint64 kPosTrinaryProperties =
    308   kTrinaryProperties & 0x5555555555555555ULL;
    309 
    310 // 2nd bit of trinary properties
    311 const uint64 kNegTrinaryProperties =
    312   kTrinaryProperties & 0xaaaaaaaaaaaaaaaaULL;
    313 
    314 // All properties
    315 const uint64 kFstProperties = kBinaryProperties | kTrinaryProperties;
    316 
    317 //
    318 // PROPERTY FUNCTIONS and STRING NAMES (defined in properties.cc)
    319 //
    320 
    321 // Below are functions for getting property bit vectors when executing
    322 // mutating fst operations.
    323 inline uint64 SetStartProperties(uint64 inprops);
    324 template <typename Weight>
    325 uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
    326                           Weight new_weight);
    327 inline uint64 AddStateProperties(uint64 inprops);
    328 template <typename A>
    329 uint64 AddArcProperties(uint64 inprops, typename A::StateId s, const A &arc,
    330                            const A *prev_arc);
    331 inline uint64 DeleteStatesProperties(uint64 inprops);
    332 inline uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticProps);
    333 inline uint64 DeleteArcsProperties(uint64 inprops);
    334 
    335 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed = false);
    336 uint64 ComplementProperties(uint64 inprops);
    337 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2);
    338 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2,
    339                         bool delayed = false);
    340 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label);
    341 uint64 FactorWeightProperties(uint64 inprops);
    342 uint64 InvertProperties(uint64 inprops);
    343 uint64 ProjectProperties(uint64 inprops, bool project_input);
    344 uint64 RandGenProperties(uint64 inprops, bool weighted);
    345 uint64 RelabelProperties(uint64 inprops);
    346 uint64 ReplaceProperties(const vector<uint64>& inprops,
    347                          ssize_t root,
    348                          bool epsilon_on_replace,
    349                          bool no_empty_fst);
    350 uint64 ReverseProperties(uint64 inprops);
    351 uint64 ReweightProperties(uint64 inprops);
    352 uint64 RmEpsilonProperties(uint64 inprops, bool delayed = false);
    353 uint64 ShortestPathProperties(uint64 props);
    354 uint64 SynchronizeProperties(uint64 inprops);
    355 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed = false);
    356 
    357 // Definitions of inlined functions.
    358 
    359 uint64 SetStartProperties(uint64 inprops) {
    360   uint64 outprops = inprops & kSetStartProperties;
    361   if (inprops & kAcyclic) {
    362     outprops |= kInitialAcyclic;
    363   }
    364   return outprops;
    365 }
    366 
    367 uint64 AddStateProperties(uint64 inprops) {
    368   return inprops & kAddStateProperties;
    369 }
    370 
    371 uint64 DeleteStatesProperties(uint64 inprops) {
    372   return inprops & kDeleteStatesProperties;
    373 }
    374 
    375 uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticprops) {
    376   uint64 outprops = inprops & kError;
    377   return outprops | kNullProperties | staticprops;
    378 }
    379 
    380 uint64 DeleteArcsProperties(uint64 inprops) {
    381   return inprops & kDeleteArcsProperties;
    382 }
    383 
    384 // Definitions of template functions.
    385 
    386 //
    387 template <typename Weight>
    388 uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
    389                           Weight new_weight) {
    390   uint64 outprops = inprops;
    391   if (old_weight != Weight::Zero() && old_weight != Weight::One()) {
    392     outprops &= ~kWeighted;
    393   }
    394   if (new_weight != Weight::Zero() && new_weight != Weight::One()) {
    395     outprops |= kWeighted;
    396     outprops &= ~kUnweighted;
    397   }
    398   outprops &= kSetFinalProperties | kWeighted | kUnweighted;
    399   return outprops;
    400 }
    401 
    402 /// Gets the properties for the MutableFst::AddArc method.
    403 ///
    404 /// \param inprops  the current properties of the fst
    405 /// \param s        the id of the state to which an arc is being added
    406 /// \param arc      the arc being added to the state with the specified id
    407 /// \param prev_arc the previously-added (or "last") arc of state s, or NULL if
    408 ///                 s currently has no arcs
    409 template <typename A>
    410 uint64 AddArcProperties(uint64 inprops, typename A::StateId s,
    411                         const A &arc, const A *prev_arc) {
    412   uint64 outprops = inprops;
    413   if (arc.ilabel != arc.olabel) {
    414     outprops |= kNotAcceptor;
    415     outprops &= ~kAcceptor;
    416   }
    417   if (arc.ilabel == 0) {
    418     outprops |= kIEpsilons;
    419     outprops &= ~kNoIEpsilons;
    420     if (arc.olabel == 0) {
    421       outprops |= kEpsilons;
    422       outprops &= ~kNoEpsilons;
    423     }
    424   }
    425   if (arc.olabel == 0) {
    426     outprops |= kOEpsilons;
    427     outprops &= ~kNoOEpsilons;
    428   }
    429   if (prev_arc != 0) {
    430     if (prev_arc->ilabel > arc.ilabel) {
    431       outprops |= kNotILabelSorted;
    432       outprops &= ~kILabelSorted;
    433     }
    434     if (prev_arc->olabel > arc.olabel) {
    435       outprops |= kNotOLabelSorted;
    436       outprops &= ~kOLabelSorted;
    437     }
    438   }
    439   if (arc.weight != A::Weight::Zero() && arc.weight != A::Weight::One()) {
    440     outprops |= kWeighted;
    441     outprops &= ~kUnweighted;
    442   }
    443   if (arc.nextstate <= s) {
    444     outprops |= kNotTopSorted;
    445     outprops &= ~kTopSorted;
    446   }
    447   outprops &= kAddArcProperties | kAcceptor |
    448               kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
    449               kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted;
    450   if (outprops & kTopSorted) {
    451     outprops |= kAcyclic | kInitialAcyclic;
    452   }
    453   return outprops;
    454 }
    455 
    456 extern const char *PropertyNames[];
    457 
    458 }  // namespace fst
    459 
    460 #endif  // FST_LIB_PROPERTIES_H__
    461