Home | History | Annotate | Download | only in lib
      1 // fst.cc
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // FST definitions.
     20 
     21 #include <fst/fst.h>
     22 
     23 // Include these so they are registered
     24 #include <fst/compact-fst.h>
     25 #include <fst/const-fst.h>
     26 #include <fst/matcher-fst.h>
     27 #include <fst/vector-fst.h>
     28 #include <fst/edit-fst.h>
     29 
     30 // FST flag definitions
     31 
     32 DEFINE_bool(fst_verify_properties, false,
     33             "Verify fst properties queried by TestProperties");
     34 
     35 DEFINE_string(fst_weight_separator, ",",
     36               "Character separator between printed composite weights; "
     37               "must be a single character");
     38 
     39 DEFINE_string(fst_weight_parentheses, "",
     40               "Characters enclosing the first weight of a printed composite "
     41               "weight (e.g. pair weight, tuple weight and derived classes) to "
     42               "ensure proper I/O of nested composite weights; "
     43               "must have size 0 (none) or 2 (open and close parenthesis)");
     44 
     45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
     46 
     47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
     48              "Cache byte size that triggers garbage collection");
     49 
     50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
     51 
     52 DEFINE_string(save_relabel_ipairs, "",  "Save input relabel pairs to file");
     53 DEFINE_string(save_relabel_opairs, "",  "Save output relabel pairs to file");
     54 
     55 DEFINE_string(fst_read_mode, "read",
     56               "Default file reading mode for mappable files");
     57 
     58 namespace fst {
     59 
     60 // Register VectorFst, ConstFst and EditFst for common arcs types
     61 REGISTER_FST(VectorFst, StdArc);
     62 REGISTER_FST(VectorFst, LogArc);
     63 REGISTER_FST(VectorFst, Log64Arc);
     64 REGISTER_FST(ConstFst, StdArc);
     65 REGISTER_FST(ConstFst, LogArc);
     66 REGISTER_FST(ConstFst, Log64Arc);
     67 REGISTER_FST(EditFst, StdArc);
     68 REGISTER_FST(EditFst, LogArc);
     69 REGISTER_FST(EditFst, Log64Arc);
     70 
     71 // Register CompactFst for common arcs with the default (uint32) size type
     72 static FstRegisterer<
     73   CompactFst<StdArc, StringCompactor<StdArc> > >
     74 CompactFst_StdArc_StringCompactor_registerer;
     75 static FstRegisterer<
     76   CompactFst<LogArc, StringCompactor<LogArc> > >
     77 CompactFst_LogArc_StringCompactor_registerer;
     78 static FstRegisterer<
     79   CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
     80 CompactFst_StdArc_WeightedStringCompactor_registerer;
     81 static FstRegisterer<
     82   CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
     83 CompactFst_LogArc_WeightedStringCompactor_registerer;
     84 static FstRegisterer<
     85   CompactFst<StdArc, AcceptorCompactor<StdArc> > >
     86 CompactFst_StdArc_AcceptorCompactor_registerer;
     87 static FstRegisterer<
     88   CompactFst<LogArc, AcceptorCompactor<LogArc> > >
     89 CompactFst_LogArc_AcceptorCompactor_registerer;
     90 static FstRegisterer<
     91   CompactFst<StdArc, UnweightedCompactor<StdArc> > >
     92 CompactFst_StdArc_UnweightedCompactor_registerer;
     93 static FstRegisterer<
     94   CompactFst<LogArc, UnweightedCompactor<LogArc> > >
     95 CompactFst_LogArc_UnweightedCompactor_registerer;
     96 static FstRegisterer<
     97   CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
     98 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
     99 static FstRegisterer<
    100   CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
    101 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
    102 
    103 // Fst type definitions for lookahead Fsts.
    104 extern const char arc_lookahead_fst_type[] = "arc_lookahead";
    105 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
    106 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
    107 
    108 // Identifies stream data as an FST (and its endianity)
    109 static const int32 kFstMagicNumber = 2125659606;
    110 
    111 // Check for Fst magic number in stream, to indicate
    112 // caller function that the stream content is an Fst header;
    113 bool IsFstHeader(istream &strm, const string &source) {
    114   int64 pos = strm.tellg();
    115   bool match = true;
    116   int32 magic_number = 0;
    117   ReadType(strm, &magic_number);
    118   if (magic_number != kFstMagicNumber
    119       ) {
    120     match = false;
    121   }
    122   strm.seekg(pos);
    123   return match;
    124 }
    125 
    126 // Check Fst magic number and read in Fst header.
    127 // If rewind = true, reposition stream to before call (if possible).
    128 bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
    129   int64 pos = 0;
    130   if (rewind) pos = strm.tellg();
    131   int32 magic_number = 0;
    132   ReadType(strm, &magic_number);
    133   if (magic_number != kFstMagicNumber
    134       ) {
    135     LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
    136     if (rewind) strm.seekg(pos);
    137     return false;
    138   }
    139 
    140   ReadType(strm, &fsttype_);
    141   ReadType(strm, &arctype_);
    142   ReadType(strm, &version_);
    143   ReadType(strm, &flags_);
    144   ReadType(strm, &properties_);
    145   ReadType(strm, &start_);
    146   ReadType(strm, &numstates_);
    147   ReadType(strm, &numarcs_);
    148   if (!strm) {
    149     LOG(ERROR) << "FstHeader::Read: read failed: " << source;
    150     return false;
    151   }
    152   if (rewind) strm.seekg(pos);
    153   return true;
    154 }
    155 
    156 // Write Fst magic number and Fst header.
    157 bool FstHeader::Write(ostream &strm, const string &source) const {
    158   WriteType(strm, kFstMagicNumber);
    159   WriteType(strm, fsttype_);
    160   WriteType(strm, arctype_);
    161   WriteType(strm, version_);
    162   WriteType(strm, flags_);
    163   WriteType(strm, properties_);
    164   WriteType(strm, start_);
    165   WriteType(strm, numstates_);
    166   WriteType(strm, numarcs_);
    167   return true;
    168 }
    169 
    170 FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr,
    171                                const SymbolTable* isym, const SymbolTable* osym)
    172   : source(src), header(hdr), isymbols(isym), osymbols(osym) {
    173   mode = ReadMode(FLAGS_fst_read_mode);
    174 }
    175 
    176 FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym,
    177                                const SymbolTable* osym)
    178   : source(src), header(0), isymbols(isym), osymbols(osym) {
    179   mode = ReadMode(FLAGS_fst_read_mode);
    180 }
    181 
    182 FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) {
    183   if (mode == "read") {
    184     return READ;
    185   }
    186   if (mode == "map") {
    187     return MAP;
    188   }
    189   LOG(ERROR) << "Unknown file read mode " << mode;
    190   return READ;
    191 }
    192 
    193 }  // namespace fst
    194