Home | History | Annotate | Download | only in lib
      1 // properties.cc
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Functions for updating property bits for various FST operations and
     20 // string names of the properties.
     21 
     22 #include <fst/properties.h>
     23 
     24 #include <stddef.h>
     25 #include <vector>
     26 using std::vector;
     27 
     28 namespace fst {
     29 
     30 // These functions determine the properties associated with the FST
     31 // result of various finite-state operations. The property arguments
     32 // correspond to the operation's FST arguments. The properties
     33 // returned assume the operation modifies its first argument.
     34 // Bitwise-and this result with kCopyProperties for the case when a
     35 // new (possibly delayed) FST is instead constructed.
     36 
     37 // Properties for a concatenatively-closed FST.
     38 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
     39   uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
     40   if (!delayed)
     41        outprops |= (kExpanded | kMutable | kCoAccessible |
     42                     kNotTopSorted | kNotString) & inprops;
     43   if (!delayed || inprops & kAccessible)
     44     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
     45                  kNotILabelSorted | kNotOLabelSorted | kWeighted |
     46                  kNotAccessible | kNotCoAccessible) & inprops;
     47   return outprops;
     48 }
     49 
     50 // Properties for a complemented FST.
     51 uint64 ComplementProperties(uint64 inprops) {
     52   uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
     53                     kNoIEpsilons | kNoOEpsilons |
     54                     kIDeterministic | kODeterministic | kAccessible;
     55   outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
     56       inprops;
     57   if (inprops & kAccessible)
     58     outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
     59   return outprops;
     60 }
     61 
     62 // Properties for a composed FST.
     63 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
     64   uint64 outprops = kError & (inprops1 | inprops2);
     65   if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
     66     outprops |= kAcceptor | kAccessible;
     67     outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
     68                  kInitialAcyclic) & inprops1 & inprops2;
     69     if (kNoIEpsilons & inprops1 & inprops2)
     70       outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
     71   } else {
     72     outprops |= kAccessible;
     73     outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
     74         inprops1 & inprops2;
     75     if (kNoIEpsilons & inprops1 & inprops2)
     76       outprops |= kIDeterministic & inprops1 & inprops2;
     77   }
     78   return outprops;
     79 }
     80 
     81 // Properties for a concatenated FST.
     82 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
     83   uint64 outprops =
     84     (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
     85   outprops |= kError & (inprops1 | inprops2);
     86 
     87   bool empty1 = delayed;  // Can fst1 be the empty machine?
     88   bool empty2 = delayed;  // Can fst2 be the empty machine?
     89 
     90   if (!delayed) {
     91     outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
     92     outprops |= (kNotTopSorted | kNotString) & inprops2;
     93   }
     94   if (!empty1)
     95     outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
     96   if (!delayed || inprops1 & kAccessible)
     97     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
     98                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
     99                  kNotOLabelSorted | kWeighted | kCyclic |
    100                  kNotAccessible | kNotCoAccessible) & inprops1;
    101   if ((inprops1 & (kAccessible | kCoAccessible)) ==
    102       (kAccessible | kCoAccessible) && !empty1) {
    103     outprops |= kAccessible & inprops2;
    104     if (!empty2)
    105       outprops |= kCoAccessible & inprops2;
    106     if (!delayed || inprops2 & kAccessible)
    107       outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    108                    kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
    109                    kNotOLabelSorted | kWeighted | kCyclic |
    110                    kNotAccessible | kNotCoAccessible) & inprops2;
    111   }
    112   return outprops;
    113 }
    114 
    115 // Properties for a determinized FST.
    116 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
    117   uint64 outprops = kAccessible;
    118   if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
    119     outprops |= kIDeterministic;
    120   outprops |= (kError | kAcceptor | kNoEpsilons | kAcyclic |
    121                kInitialAcyclic | kCoAccessible | kString) & inprops;
    122   if (inprops & kAccessible)
    123      outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
    124                   kCyclic) & inprops;
    125   if (inprops & kAcceptor)
    126     outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
    127   if ((inprops & kNoIEpsilons) && has_subsequential_label)
    128     outprops |= kNoIEpsilons;
    129   return outprops;
    130 }
    131 
    132 // Properties for factored weight FST.
    133 uint64 FactorWeightProperties(uint64 inprops) {
    134   uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
    135                      kAcyclic | kAccessible | kCoAccessible) & inprops;
    136   if (inprops & kAccessible)
    137     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    138                  kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
    139                  kNotILabelSorted | kNotOLabelSorted)
    140         & inprops;
    141   return outprops;
    142 }
    143 
    144 // Properties for an inverted FST.
    145 uint64 InvertProperties(uint64 inprops) {
    146   uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
    147                      kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
    148                      kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
    149                      kTopSorted | kNotTopSorted |
    150                      kAccessible | kNotAccessible |
    151                      kCoAccessible | kNotCoAccessible |
    152                      kString | kNotString) & inprops;
    153   if (kIDeterministic & inprops)
    154     outprops |= kODeterministic;
    155   if (kNonIDeterministic & inprops)
    156     outprops |= kNonODeterministic;
    157   if (kODeterministic & inprops)
    158     outprops |= kIDeterministic;
    159   if (kNonODeterministic & inprops)
    160     outprops |= kNonIDeterministic;
    161 
    162   if (kIEpsilons & inprops)
    163     outprops |= kOEpsilons;
    164   if (kNoIEpsilons & inprops)
    165     outprops |= kNoOEpsilons;
    166   if (kOEpsilons & inprops)
    167     outprops |= kIEpsilons;
    168   if (kNoOEpsilons & inprops)
    169     outprops |= kNoIEpsilons;
    170 
    171   if (kILabelSorted & inprops)
    172     outprops |= kOLabelSorted;
    173   if (kNotILabelSorted & inprops)
    174     outprops |= kNotOLabelSorted;
    175   if (kOLabelSorted & inprops)
    176     outprops |= kILabelSorted;
    177   if (kNotOLabelSorted & inprops)
    178     outprops |= kNotILabelSorted;
    179   return outprops;
    180 }
    181 
    182 // Properties for a projected FST.
    183 uint64 ProjectProperties(uint64 inprops, bool project_input) {
    184   uint64 outprops = kAcceptor;
    185   outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
    186                kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
    187                kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
    188                kCoAccessible | kNotCoAccessible |
    189                kString | kNotString) & inprops;
    190   if (project_input) {
    191     outprops |= (kIDeterministic | kNonIDeterministic |
    192                  kIEpsilons | kNoIEpsilons |
    193                  kILabelSorted | kNotILabelSorted) & inprops;
    194 
    195     if (kIDeterministic & inprops)
    196       outprops |= kODeterministic;
    197     if (kNonIDeterministic & inprops)
    198       outprops |= kNonODeterministic;
    199 
    200     if (kIEpsilons & inprops)
    201       outprops |= kOEpsilons | kEpsilons;
    202     if (kNoIEpsilons & inprops)
    203       outprops |= kNoOEpsilons | kNoEpsilons;
    204 
    205     if (kILabelSorted & inprops)
    206       outprops |= kOLabelSorted;
    207     if (kNotILabelSorted & inprops)
    208       outprops |= kNotOLabelSorted;
    209   } else {
    210     outprops |= (kODeterministic | kNonODeterministic |
    211                  kOEpsilons | kNoOEpsilons |
    212                  kOLabelSorted | kNotOLabelSorted) & inprops;
    213 
    214     if (kODeterministic & inprops)
    215       outprops |= kIDeterministic;
    216     if (kNonODeterministic & inprops)
    217       outprops |= kNonIDeterministic;
    218 
    219     if (kOEpsilons & inprops)
    220       outprops |= kIEpsilons | kEpsilons;
    221     if (kNoOEpsilons & inprops)
    222       outprops |= kNoIEpsilons | kNoEpsilons;
    223 
    224     if (kOLabelSorted & inprops)
    225       outprops |= kILabelSorted;
    226     if (kNotOLabelSorted & inprops)
    227       outprops |= kNotILabelSorted;
    228   }
    229   return outprops;
    230 }
    231 
    232 // Properties for a randgen FST.
    233 uint64 RandGenProperties(uint64 inprops, bool weighted) {
    234   uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
    235   outprops |= inprops & kError;
    236   if (weighted) {
    237     outprops |= kTopSorted;
    238     outprops |= (kAcceptor | kNoEpsilons |
    239                  kNoIEpsilons | kNoOEpsilons |
    240                  kIDeterministic | kODeterministic |
    241                  kILabelSorted | kOLabelSorted) & inprops;
    242   } else {
    243     outprops |= kUnweighted;
    244     outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
    245   }
    246   return outprops;
    247 }
    248 
    249 // Properties for a replace FST.
    250 uint64 ReplaceProperties(const vector<uint64>& inprops,
    251                          ssize_t root,
    252                          bool epsilon_on_replace,
    253                          bool no_empty_fsts) {
    254   if (inprops.size() == 0)
    255     return kNullProperties;
    256   uint64 outprops = 0;
    257   for (size_t i = 0; i < inprops.size(); ++i)
    258     outprops |= kError & inprops[i];
    259   uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
    260   for (size_t i = 0; i < inprops.size(); ++i)
    261     access_props &= (inprops[i] & (kAccessible | kCoAccessible));
    262   if (access_props == (kAccessible | kCoAccessible)) {
    263     outprops |= access_props;
    264     if (inprops[root] & kInitialCyclic)
    265       outprops |= kInitialCyclic;
    266     uint64 props =  0;
    267     bool string = true;
    268     for (size_t i = 0; i < inprops.size(); ++i) {
    269       if (epsilon_on_replace == false)
    270         props |= kNotAcceptor & inprops[i];
    271       props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
    272                 kIEpsilons | kOEpsilons | kWeighted | kCyclic |
    273                 kNotTopSorted | kNotString) & inprops[i];
    274       if (!(inprops[i] & kString))
    275         string = false;
    276     }
    277     outprops |= props;
    278     if (string)
    279       outprops |= kString;
    280   }
    281   bool acceptor = epsilon_on_replace;
    282   bool ideterministic = !epsilon_on_replace;
    283   bool no_iepsilons = !epsilon_on_replace;
    284   bool acyclic = true;
    285   bool unweighted = true;
    286   for (size_t i = 0; i < inprops.size(); ++i) {
    287     if (!(inprops[i] & kAcceptor))
    288       acceptor = false;
    289     if (!(inprops[i] & kIDeterministic))
    290       ideterministic = false;
    291     if (!(inprops[i] & kNoIEpsilons))
    292       no_iepsilons = false;
    293     if (!(inprops[i] & kAcyclic))
    294       acyclic = false;
    295     if (!(inprops[i] & kUnweighted))
    296       unweighted = false;
    297   }
    298   if (acceptor)
    299     outprops |= kAcceptor;
    300   if (ideterministic)
    301     outprops |= kIDeterministic;
    302   if (no_iepsilons)
    303     outprops |= kNoIEpsilons;
    304   if (acyclic)
    305     outprops |= kAcyclic;
    306   if (unweighted)
    307     outprops |= kUnweighted;
    308   if (inprops[root] & kInitialAcyclic)
    309       outprops |= kInitialAcyclic;
    310   return outprops;
    311 }
    312 
    313 // Properties for a relabeled FST.
    314 uint64 RelabelProperties(uint64 inprops) {
    315   uint64 outprops = (kExpanded | kMutable | kError |
    316                      kWeighted | kUnweighted |
    317                      kCyclic | kAcyclic |
    318                      kInitialCyclic | kInitialAcyclic |
    319                      kTopSorted | kNotTopSorted |
    320                      kAccessible | kNotAccessible |
    321                      kCoAccessible | kNotCoAccessible |
    322                      kString | kNotString) & inprops;
    323   return outprops;
    324 }
    325 
    326 // Properties for a reversed FST. (the superinitial state limits this set)
    327 uint64 ReverseProperties(uint64 inprops) {
    328   uint64 outprops =
    329     (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
    330      kIEpsilons | kOEpsilons | kWeighted | kUnweighted |
    331      kCyclic | kAcyclic) & inprops;
    332   return outprops;
    333 }
    334 
    335 // Properties for re-weighted FST.
    336 uint64 ReweightProperties(uint64 inprops) {
    337   uint64 outprops = inprops & kWeightInvariantProperties;
    338   outprops = outprops & ~kCoAccessible;
    339   return outprops;
    340 }
    341 
    342 // Properties for an epsilon-removed FST.
    343 uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
    344   uint64 outprops = kNoEpsilons;
    345   outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
    346   if (inprops & kAcceptor)
    347     outprops |= kNoIEpsilons | kNoOEpsilons;
    348   if (!delayed) {
    349     outprops |= kExpanded | kMutable;
    350     outprops |= kTopSorted & inprops;
    351   }
    352   if (!delayed || inprops & kAccessible)
    353     outprops |= kNotAcceptor & inprops;
    354   return outprops;
    355 }
    356 
    357 // Properties for shortest path. This function computes how the properties
    358 // of the output of shortest path need to be updated, given that 'props' is
    359 // already known.
    360 uint64 ShortestPathProperties(uint64 props) {
    361   return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
    362 }
    363 
    364 // Properties for a synchronized FST.
    365 uint64 SynchronizeProperties(uint64 inprops) {
    366   uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
    367                      kCoAccessible | kUnweighted) & inprops;
    368   if (inprops & kAccessible)
    369     outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
    370   return outprops;
    371 }
    372 
    373 // Properties for a unioned FST.
    374 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
    375   uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
    376                     & inprops1 & inprops2;
    377   outprops |= kError & (inprops1 | inprops2);
    378 
    379   bool empty1 = delayed;  // Can fst1 be the empty machine?
    380   bool empty2 = delayed;  // Can fst2 be the empty machine?
    381   if (!delayed) {
    382     outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
    383     outprops |= (kNotTopSorted | kNotString) & inprops2;
    384   }
    385   if (!empty1 && !empty2) {
    386     outprops |= kEpsilons | kIEpsilons | kOEpsilons;
    387     outprops |= kCoAccessible & inprops1 & inprops2;
    388   }
    389   // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
    390   if (!delayed || inprops1 & kAccessible)
    391     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    392                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
    393                  kNotOLabelSorted | kWeighted | kCyclic |
    394                  kNotAccessible) & inprops1;
    395   if (!delayed || inprops2 & kAccessible)
    396     outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
    397                  kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
    398                  kNotOLabelSorted | kWeighted | kCyclic |
    399                  kNotAccessible | kNotCoAccessible) & inprops2;
    400   return outprops;
    401 }
    402 
    403 
    404 // Property string names (indexed by bit position).
    405 const char *PropertyNames[] = {
    406   // binary
    407   "expanded", "mutable", "error", "", "", "", "", "",
    408   "", "", "", "", "", "", "", "",
    409   // trinary
    410   "acceptor", "not acceptor",
    411   "input deterministic", "non input deterministic",
    412   "output deterministic", "non output deterministic",
    413   "input/output epsilons", "no input/output epsilons",
    414   "input epsilons", "no input epsilons",
    415   "output epsilons", "no output epsilons",
    416   "input label sorted", "not input label sorted",
    417   "output label sorted", "not output label sorted",
    418   "weighted", "unweighted",
    419   "cyclic", "acyclic",
    420   "cyclic at initial state", "acyclic at initial state",
    421   "top sorted", "not top sorted",
    422   "accessible", "not accessible",
    423   "coaccessible", "not coaccessible",
    424   "string", "not string",
    425 };
    426 
    427 }  // namespace fst
    428