Home | History | Annotate | Download | only in lib
      1 // properties.cc
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Functions for updating property bits for various FST operations and
     20 // string names of the properties.
     21 
     22 #include <fst/properties.h>
     23 
     24 #include <stddef.h>
     25 #include <vector>
     26 using std::vector;
     27 
     28 namespace fst {
     29 
     30 // These functions determine the properties associated with the FST
     31 // result of various finite-state operations. The property arguments
     32 // correspond to the operation's FST arguments. The properties
     33 // returned assume the operation modifies its first argument.
     34 // Bitwise-and this result with kCopyProperties for the case when a
     35 // new (possibly delayed) FST is instead constructed.
     36 
     37 // Properties for a concatenatively-closed FST.
     38 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
     39   uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
     40   if (!delayed)
     41        outprops |= (kExpanded | kMutable | kCoAccessible |
     42                     kNotTopSorted | kNotString) & inprops;
     43   if (!delayed || inprops & kAccessible)
     44     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
     45                  kNotILabelSorted | kNotOLabelSorted | kWeighted |
     46                  kNotAccessible | kNotCoAccessible) & inprops;
     47   return outprops;
     48 }
     49 
     50 // Properties for a complemented FST.
     51 uint64 ComplementProperties(uint64 inprops) {
     52   uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
     53                     kNoIEpsilons | kNoOEpsilons |
     54                     kIDeterministic | kODeterministic | kAccessible;
     55   outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
     56       inprops;
     57   if (inprops & kAccessible)
     58     outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
     59   return outprops;
     60 }
     61 
     62 // Properties for a composed FST.
     63 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
     64   uint64 outprops = kError & (inprops1 | inprops2);
     65   if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
     66     outprops |= kAcceptor | kAccessible;
     67     outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
     68                  kInitialAcyclic) & inprops1 & inprops2;
     69     if (kNoIEpsilons & inprops1 & inprops2)
     70       outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
     71   } else {
     72     outprops |= kAccessible;
     73     outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
     74         inprops1 & inprops2;
     75     if (kNoIEpsilons & inprops1 & inprops2)
     76       outprops |= kIDeterministic & inprops1 & inprops2;
     77   }
     78   return outprops;
     79 }
     80 
     81 // Properties for a concatenated FST.
     82 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
     83   uint64 outprops =
     84     (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
     85   outprops |= kError & (inprops1 | inprops2);
     86 
     87   bool empty1 = delayed;  // Can fst1 be the empty machine?
     88   bool empty2 = delayed;  // Can fst2 be the empty machine?
     89 
     90   if (!delayed) {
     91     outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
     92     outprops |= (kNotTopSorted | kNotString) & inprops2;
     93   }
     94   if (!empty1)
     95     outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
     96   if (!delayed || inprops1 & kAccessible)
     97     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
     98                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
     99                  kNotOLabelSorted | kWeighted | kCyclic |
    100                  kNotAccessible | kNotCoAccessible) & inprops1;
    101   if ((inprops1 & (kAccessible | kCoAccessible)) ==
    102       (kAccessible | kCoAccessible) && !empty1) {
    103     outprops |= kAccessible & inprops2;
    104     if (!empty2)
    105       outprops |= kCoAccessible & inprops2;
    106     if (!delayed || inprops2 & kAccessible)
    107       outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    108                    kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
    109                    kNotOLabelSorted | kWeighted | kCyclic |
    110                    kNotAccessible | kNotCoAccessible) & inprops2;
    111   }
    112   return outprops;
    113 }
    114 
    115 // Properties for a determinized FST.
    116 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
    117   uint64 outprops = kAccessible;
    118   if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
    119     outprops |= kIDeterministic;
    120   outprops |= (kError | kAcceptor | kAcyclic |
    121                kInitialAcyclic | kCoAccessible | kString) & inprops;
    122   if (inprops & kNoIEpsilons)
    123     outprops |= kNoEpsilons & inprops;
    124   if (inprops & kAccessible)
    125      outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
    126                   kCyclic) & inprops;
    127   if (inprops & kAcceptor)
    128     outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
    129   if ((inprops & kNoIEpsilons) && has_subsequential_label)
    130     outprops |= kNoIEpsilons;
    131   return outprops;
    132 }
    133 
    134 // Properties for factored weight FST.
    135 uint64 FactorWeightProperties(uint64 inprops) {
    136   uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
    137                      kAcyclic | kAccessible | kCoAccessible) & inprops;
    138   if (inprops & kAccessible)
    139     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    140                  kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
    141                  kNotILabelSorted | kNotOLabelSorted)
    142         & inprops;
    143   return outprops;
    144 }
    145 
    146 // Properties for an inverted FST.
    147 uint64 InvertProperties(uint64 inprops) {
    148   uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
    149                      kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
    150                      kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
    151                      kTopSorted | kNotTopSorted |
    152                      kAccessible | kNotAccessible |
    153                      kCoAccessible | kNotCoAccessible |
    154                      kString | kNotString) & inprops;
    155   if (kIDeterministic & inprops)
    156     outprops |= kODeterministic;
    157   if (kNonIDeterministic & inprops)
    158     outprops |= kNonODeterministic;
    159   if (kODeterministic & inprops)
    160     outprops |= kIDeterministic;
    161   if (kNonODeterministic & inprops)
    162     outprops |= kNonIDeterministic;
    163 
    164   if (kIEpsilons & inprops)
    165     outprops |= kOEpsilons;
    166   if (kNoIEpsilons & inprops)
    167     outprops |= kNoOEpsilons;
    168   if (kOEpsilons & inprops)
    169     outprops |= kIEpsilons;
    170   if (kNoOEpsilons & inprops)
    171     outprops |= kNoIEpsilons;
    172 
    173   if (kILabelSorted & inprops)
    174     outprops |= kOLabelSorted;
    175   if (kNotILabelSorted & inprops)
    176     outprops |= kNotOLabelSorted;
    177   if (kOLabelSorted & inprops)
    178     outprops |= kILabelSorted;
    179   if (kNotOLabelSorted & inprops)
    180     outprops |= kNotILabelSorted;
    181   return outprops;
    182 }
    183 
    184 // Properties for a projected FST.
    185 uint64 ProjectProperties(uint64 inprops, bool project_input) {
    186   uint64 outprops = kAcceptor;
    187   outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
    188                kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
    189                kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
    190                kCoAccessible | kNotCoAccessible |
    191                kString | kNotString) & inprops;
    192   if (project_input) {
    193     outprops |= (kIDeterministic | kNonIDeterministic |
    194                  kIEpsilons | kNoIEpsilons |
    195                  kILabelSorted | kNotILabelSorted) & inprops;
    196 
    197     if (kIDeterministic & inprops)
    198       outprops |= kODeterministic;
    199     if (kNonIDeterministic & inprops)
    200       outprops |= kNonODeterministic;
    201 
    202     if (kIEpsilons & inprops)
    203       outprops |= kOEpsilons | kEpsilons;
    204     if (kNoIEpsilons & inprops)
    205       outprops |= kNoOEpsilons | kNoEpsilons;
    206 
    207     if (kILabelSorted & inprops)
    208       outprops |= kOLabelSorted;
    209     if (kNotILabelSorted & inprops)
    210       outprops |= kNotOLabelSorted;
    211   } else {
    212     outprops |= (kODeterministic | kNonODeterministic |
    213                  kOEpsilons | kNoOEpsilons |
    214                  kOLabelSorted | kNotOLabelSorted) & inprops;
    215 
    216     if (kODeterministic & inprops)
    217       outprops |= kIDeterministic;
    218     if (kNonODeterministic & inprops)
    219       outprops |= kNonIDeterministic;
    220 
    221     if (kOEpsilons & inprops)
    222       outprops |= kIEpsilons | kEpsilons;
    223     if (kNoOEpsilons & inprops)
    224       outprops |= kNoIEpsilons | kNoEpsilons;
    225 
    226     if (kOLabelSorted & inprops)
    227       outprops |= kILabelSorted;
    228     if (kNotOLabelSorted & inprops)
    229       outprops |= kNotILabelSorted;
    230   }
    231   return outprops;
    232 }
    233 
    234 // Properties for a randgen FST.
    235 uint64 RandGenProperties(uint64 inprops, bool weighted) {
    236   uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
    237   outprops |= inprops & kError;
    238   if (weighted) {
    239     outprops |= kTopSorted;
    240     outprops |= (kAcceptor | kNoEpsilons |
    241                  kNoIEpsilons | kNoOEpsilons |
    242                  kIDeterministic | kODeterministic |
    243                  kILabelSorted | kOLabelSorted) & inprops;
    244   } else {
    245     outprops |= kUnweighted;
    246     outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
    247   }
    248   return outprops;
    249 }
    250 
    251 // Properties for a replace FST.
    252 uint64 ReplaceProperties(const vector<uint64>& inprops,
    253                          ssize_t root,
    254                          bool epsilon_on_replace,
    255                          bool no_empty_fsts) {
    256   if (inprops.size() == 0)
    257     return kNullProperties;
    258   uint64 outprops = 0;
    259   for (size_t i = 0; i < inprops.size(); ++i)
    260     outprops |= kError & inprops[i];
    261   uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
    262   for (size_t i = 0; i < inprops.size(); ++i)
    263     access_props &= (inprops[i] & (kAccessible | kCoAccessible));
    264   if (access_props == (kAccessible | kCoAccessible)) {
    265     outprops |= access_props;
    266     if (inprops[root] & kInitialCyclic)
    267       outprops |= kInitialCyclic;
    268     uint64 props =  0;
    269     bool string = true;
    270     for (size_t i = 0; i < inprops.size(); ++i) {
    271       if (epsilon_on_replace == false)
    272         props |= kNotAcceptor & inprops[i];
    273       props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
    274                 kIEpsilons | kOEpsilons | kWeighted | kCyclic |
    275                 kNotTopSorted | kNotString) & inprops[i];
    276       if (!(inprops[i] & kString))
    277         string = false;
    278     }
    279     outprops |= props;
    280     if (string)
    281       outprops |= kString;
    282   }
    283   bool acceptor = epsilon_on_replace;
    284   bool ideterministic = !epsilon_on_replace;
    285   bool no_iepsilons = !epsilon_on_replace;
    286   bool acyclic = true;
    287   bool unweighted = true;
    288   for (size_t i = 0; i < inprops.size(); ++i) {
    289     if (!(inprops[i] & kAcceptor))
    290       acceptor = false;
    291     if (!(inprops[i] & kIDeterministic))
    292       ideterministic = false;
    293     if (!(inprops[i] & kNoIEpsilons))
    294       no_iepsilons = false;
    295     if (!(inprops[i] & kAcyclic))
    296       acyclic = false;
    297     if (!(inprops[i] & kUnweighted))
    298       unweighted = false;
    299   }
    300   if (acceptor)
    301     outprops |= kAcceptor;
    302   if (ideterministic)
    303     outprops |= kIDeterministic;
    304   if (no_iepsilons)
    305     outprops |= kNoIEpsilons;
    306   if (acyclic)
    307     outprops |= kAcyclic;
    308   if (unweighted)
    309     outprops |= kUnweighted;
    310   if (inprops[root] & kInitialAcyclic)
    311       outprops |= kInitialAcyclic;
    312   return outprops;
    313 }
    314 
    315 // Properties for a relabeled FST.
    316 uint64 RelabelProperties(uint64 inprops) {
    317   uint64 outprops = (kExpanded | kMutable | kError |
    318                      kWeighted | kUnweighted |
    319                      kCyclic | kAcyclic |
    320                      kInitialCyclic | kInitialAcyclic |
    321                      kTopSorted | kNotTopSorted |
    322                      kAccessible | kNotAccessible |
    323                      kCoAccessible | kNotCoAccessible |
    324                      kString | kNotString) & inprops;
    325   return outprops;
    326 }
    327 
    328 // Properties for a reversed FST. (the superinitial state limits this set)
    329 uint64 ReverseProperties(uint64 inprops) {
    330   uint64 outprops =
    331     (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
    332      kIEpsilons | kOEpsilons | kWeighted | kUnweighted |
    333      kCyclic | kAcyclic) & inprops;
    334   return outprops;
    335 }
    336 
    337 // Properties for re-weighted FST.
    338 uint64 ReweightProperties(uint64 inprops) {
    339   uint64 outprops = inprops & kWeightInvariantProperties;
    340   outprops = outprops & ~kCoAccessible;
    341   return outprops;
    342 }
    343 
    344 // Properties for an epsilon-removed FST.
    345 uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
    346   uint64 outprops = kNoEpsilons;
    347   outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
    348   if (inprops & kAcceptor)
    349     outprops |= kNoIEpsilons | kNoOEpsilons;
    350   if (!delayed) {
    351     outprops |= kExpanded | kMutable;
    352     outprops |= kTopSorted & inprops;
    353   }
    354   if (!delayed || inprops & kAccessible)
    355     outprops |= kNotAcceptor & inprops;
    356   return outprops;
    357 }
    358 
    359 // Properties for shortest path. This function computes how the properties
    360 // of the output of shortest path need to be updated, given that 'props' is
    361 // already known.
    362 uint64 ShortestPathProperties(uint64 props) {
    363   return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
    364 }
    365 
    366 // Properties for a synchronized FST.
    367 uint64 SynchronizeProperties(uint64 inprops) {
    368   uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
    369                      kCoAccessible | kUnweighted) & inprops;
    370   if (inprops & kAccessible)
    371     outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
    372   return outprops;
    373 }
    374 
    375 // Properties for a unioned FST.
    376 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
    377   uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
    378                     & inprops1 & inprops2;
    379   outprops |= kError & (inprops1 | inprops2);
    380   outprops |= kInitialAcyclic;
    381 
    382   bool empty1 = delayed;  // Can fst1 be the empty machine?
    383   bool empty2 = delayed;  // Can fst2 be the empty machine?
    384   if (!delayed) {
    385     outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
    386     outprops |= (kNotTopSorted | kNotString) & inprops2;
    387   }
    388   if (!empty1 && !empty2) {
    389     outprops |= kEpsilons | kIEpsilons | kOEpsilons;
    390     outprops |= kCoAccessible & inprops1 & inprops2;
    391   }
    392   // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
    393   if (!delayed || inprops1 & kAccessible)
    394     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    395                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
    396                  kNotOLabelSorted | kWeighted | kCyclic |
    397                  kNotAccessible) & inprops1;
    398   if (!delayed || inprops2 & kAccessible)
    399     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    400                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
    401                  kNotOLabelSorted | kWeighted | kCyclic |
    402                  kNotAccessible | kNotCoAccessible) & inprops2;
    403   return outprops;
    404 }
    405 
    406 
    407 // Property string names (indexed by bit position).
    408 const char *PropertyNames[] = {
    409   // binary
    410   "expanded", "mutable", "error", "", "", "", "", "",
    411   "", "", "", "", "", "", "", "",
    412   // trinary
    413   "acceptor", "not acceptor",
    414   "input deterministic", "non input deterministic",
    415   "output deterministic", "non output deterministic",
    416   "input/output epsilons", "no input/output epsilons",
    417   "input epsilons", "no input epsilons",
    418   "output epsilons", "no output epsilons",
    419   "input label sorted", "not input label sorted",
    420   "output label sorted", "not output label sorted",
    421   "weighted", "unweighted",
    422   "cyclic", "acyclic",
    423   "cyclic at initial state", "acyclic at initial state",
    424   "top sorted", "not top sorted",
    425   "accessible", "not accessible",
    426   "coaccessible", "not coaccessible",
    427   "string", "not string",
    428 };
    429 
    430 }  // namespace fst
    431