Home | History | Annotate | Download | only in fst
      1 
      2 // Licensed under the Apache License, Version 2.0 (the "License");
      3 // you may not use this file except in compliance with the License.
      4 // You may obtain a copy of the License at
      5 //
      6 //     http://www.apache.org/licenses/LICENSE-2.0
      7 //
      8 // Unless required by applicable law or agreed to in writing, software
      9 // distributed under the License is distributed on an "AS IS" BASIS,
     10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     11 // See the License for the specific language governing permissions and
     12 // limitations under the License.
     13 //
     14 // Copyright 2005-2010 Google, Inc.
     15 // Author: sorenj (at) google.com (Jeffrey Sorensen)
     16 
     17 #ifndef FST_LIB_SYMBOL_TABLE_OPS_H_
     18 #define FST_LIB_SYMBOL_TABLE_OPS_H_
     19 
     20 #include <vector>
     21 using std::vector;
     22 #include <string>
     23 #include <tr1/unordered_set>
     24 using std::tr1::unordered_set;
     25 using std::tr1::unordered_multiset;
     26 
     27 
     28 #include <fst/fst.h>
     29 #include <fst/symbol-table.h>
     30 
     31 
     32 namespace fst {
     33 
     34 // Returns a minimal symbol table containing only symbols referenced by the
     35 // passed fst.  Symbols preserve their original numbering, so fst does not
     36 // require relabeling.
     37 template<class Arc>
     38 SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms,
     39                               bool input) {
     40   unordered_set<typename Arc::Label> seen;
     41   seen.insert(0);  // Always keep epslion
     42   StateIterator<Fst<Arc> > siter(fst);
     43   for (; !siter.Done(); siter.Next()) {
     44     ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
     45     for (; !aiter.Done(); aiter.Next()) {
     46       typename Arc::Label sym = (input) ? aiter.Value().ilabel :
     47                                           aiter.Value().olabel;
     48       seen.insert(sym);
     49     }
     50   }
     51   SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned");
     52   for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) {
     53     typename Arc::Label label = stiter.Value();
     54     if (seen.find(label) != seen.end()) {
     55       pruned->AddSymbol(stiter.Symbol(), stiter.Value());
     56     }
     57   }
     58   return pruned;
     59 }
     60 
     61 // Relabels a symbol table to make it a contiguous mapping.
     62 SymbolTable *CompactSymbolTable(const SymbolTable &syms);
     63 
     64 // Merges two SymbolTables, all symbols from left will be merged into right
     65 // with the same ids.  Symbols in right that have conflicting ids with those
     66 // in left will be assigned to value assigned from the left SymbolTable.
     67 // The returned symbol table will never modify symbol assignments from the left
     68 // side, but may do so on the right.  If right_relabel_output is non-NULL, it
     69 // will be assigned true if the symbols from the right table needed to be
     70 // reassigned.
     71 // A potential use case is to Compose two Fst's that have different symbol
     72 // tables.  You can reconcile them in the following way:
     73 //   Fst<Arc> a, b;
     74 //   bool relabel;
     75 //   SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(),
     76 //                                        b.InputSymbols(), &relabel);
     77 //   if (relabel) {
     78 //     Relabel(b, bnew, NULL);
     79 //   }
     80 //   b.SetInputSymbols(bnew);
     81 //   delete bnew;
     82 SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
     83                               bool *right_relabel_output = 0);
     84 
     85 // Read the symbol table from any Fst::Read()able file, without loading the
     86 // corresponding Fst.  Returns NULL if the Fst does not contain a symbol table
     87 // or the symbol table cannot be read.
     88 SymbolTable *FstReadSymbols(const string &filename, bool input);
     89 
     90 }  // namespace fst
     91 #endif  // FST_LIB_SYMBOL_TABLE_OPS_H_
     92