Home | History | Annotate | Download | only in lib
      1 // fst.cc
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // FST definitions.
     20 
     21 #include <fst/fst.h>
     22 
     23 // Include these so they are registered
     24 #include <fst/compact-fst.h>
     25 #include <fst/const-fst.h>
     26 #include <fst/matcher-fst.h>
     27 #include <fst/vector-fst.h>
     28 #include <fst/edit-fst.h>
     29 
     30 // FST flag definitions
     31 
     32 DEFINE_bool(fst_verify_properties, false,
     33             "Verify fst properties queried by TestProperties");
     34 
     35 DEFINE_string(fst_weight_separator, ",",
     36               "Character separator between printed composite weights; "
     37               "must be a single character");
     38 
     39 DEFINE_string(fst_weight_parentheses, "",
     40               "Characters enclosing the first weight of a printed composite "
     41               "weight (e.g. pair weight, tuple weight and derived classes) to "
     42               "ensure proper I/O of nested composite weights; "
     43               "must have size 0 (none) or 2 (open and close parenthesis)");
     44 
     45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
     46 
     47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
     48              "Cache byte size that triggers garbage collection");
     49 
     50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
     51 
     52 DEFINE_string(save_relabel_ipairs, "",  "Save input relabel pairs to file");
     53 DEFINE_string(save_relabel_opairs, "",  "Save output relabel pairs to file");
     54 
     55 namespace fst {
     56 
     57 // Register VectorFst, ConstFst and EditFst for common arcs types
     58 REGISTER_FST(VectorFst, StdArc);
     59 REGISTER_FST(VectorFst, LogArc);
     60 REGISTER_FST(VectorFst, Log64Arc);
     61 REGISTER_FST(ConstFst, StdArc);
     62 REGISTER_FST(ConstFst, LogArc);
     63 REGISTER_FST(ConstFst, Log64Arc);
     64 REGISTER_FST(EditFst, StdArc);
     65 REGISTER_FST(EditFst, LogArc);
     66 REGISTER_FST(EditFst, Log64Arc);
     67 
     68 // Register CompactFst for common arcs with the default (uint32) size type
     69 static FstRegisterer<
     70   CompactFst<StdArc, StringCompactor<StdArc> > >
     71 CompactFst_StdArc_StringCompactor_registerer;
     72 static FstRegisterer<
     73   CompactFst<LogArc, StringCompactor<LogArc> > >
     74 CompactFst_LogArc_StringCompactor_registerer;
     75 static FstRegisterer<
     76   CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
     77 CompactFst_StdArc_WeightedStringCompactor_registerer;
     78 static FstRegisterer<
     79   CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
     80 CompactFst_LogArc_WeightedStringCompactor_registerer;
     81 static FstRegisterer<
     82   CompactFst<StdArc, AcceptorCompactor<StdArc> > >
     83 CompactFst_StdArc_AcceptorCompactor_registerer;
     84 static FstRegisterer<
     85   CompactFst<LogArc, AcceptorCompactor<LogArc> > >
     86 CompactFst_LogArc_AcceptorCompactor_registerer;
     87 static FstRegisterer<
     88   CompactFst<StdArc, UnweightedCompactor<StdArc> > >
     89 CompactFst_StdArc_UnweightedCompactor_registerer;
     90 static FstRegisterer<
     91   CompactFst<LogArc, UnweightedCompactor<LogArc> > >
     92 CompactFst_LogArc_UnweightedCompactor_registerer;
     93 static FstRegisterer<
     94   CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
     95 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
     96 static FstRegisterer<
     97   CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
     98 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
     99 
    100 // Fst type definitions for lookahead Fsts.
    101 extern const char arc_lookahead_fst_type[] = "arc_lookahead";
    102 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
    103 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
    104 
    105 // Identifies stream data as an FST (and its endianity)
    106 static const int32 kFstMagicNumber = 2125659606;
    107 
    108 // Check for Fst magic number in stream, to indicate
    109 // caller function that the stream content is an Fst header;
    110 bool IsFstHeader(istream &strm, const string &source) {
    111   int64 pos = strm.tellg();
    112   bool match = true;
    113   int32 magic_number = 0;
    114   ReadType(strm, &magic_number);
    115   if (magic_number != kFstMagicNumber
    116       ) {
    117     match = false;
    118   }
    119   strm.seekg(pos);
    120   return match;
    121 }
    122 
    123 // Check Fst magic number and read in Fst header.
    124 // If rewind = true, reposition stream to before call (if possible).
    125 bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
    126   int64 pos = 0;
    127   if (rewind) pos = strm.tellg();
    128   int32 magic_number = 0;
    129   ReadType(strm, &magic_number);
    130   if (magic_number != kFstMagicNumber
    131       ) {
    132     LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
    133     if (rewind) strm.seekg(pos);
    134     return false;
    135   }
    136 
    137   ReadType(strm, &fsttype_);
    138   ReadType(strm, &arctype_);
    139   ReadType(strm, &version_);
    140   ReadType(strm, &flags_);
    141   ReadType(strm, &properties_);
    142   ReadType(strm, &start_);
    143   ReadType(strm, &numstates_);
    144   ReadType(strm, &numarcs_);
    145   if (!strm) {
    146     LOG(ERROR) << "FstHeader::Read: read failed: " << source;
    147     return false;
    148   }
    149   if (rewind) strm.seekg(pos);
    150   return true;
    151 }
    152 
    153 // Write Fst magic number and Fst header.
    154 bool FstHeader::Write(ostream &strm, const string &source) const {
    155   WriteType(strm, kFstMagicNumber);
    156   WriteType(strm, fsttype_);
    157   WriteType(strm, arctype_);
    158   WriteType(strm, version_);
    159   WriteType(strm, flags_);
    160   WriteType(strm, properties_);
    161   WriteType(strm, start_);
    162   WriteType(strm, numstates_);
    163   WriteType(strm, numarcs_);
    164   return true;
    165 }
    166 
    167 }  // namespace fst
    168