1 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 // 14 // Copyright 2005-2010 Google, Inc. 15 // Author: sorenj (at) google.com (Jeffrey Sorensen) 16 17 #ifndef FST_LIB_SYMBOL_TABLE_OPS_H_ 18 #define FST_LIB_SYMBOL_TABLE_OPS_H_ 19 20 #include <vector> 21 using std::vector; 22 #include <string> 23 #include <tr1/unordered_set> 24 using std::tr1::unordered_set; 25 using std::tr1::unordered_multiset; 26 27 28 #include <fst/fst.h> 29 #include <fst/symbol-table.h> 30 31 32 namespace fst { 33 34 // Returns a minimal symbol table containing only symbols referenced by the 35 // passed fst. Symbols preserve their original numbering, so fst does not 36 // require relabeling. 37 template<class Arc> 38 SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms, 39 bool input) { 40 unordered_set<typename Arc::Label> seen; 41 seen.insert(0); // Always keep epslion 42 StateIterator<Fst<Arc> > siter(fst); 43 for (; !siter.Done(); siter.Next()) { 44 ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); 45 for (; !aiter.Done(); aiter.Next()) { 46 typename Arc::Label sym = (input) ? aiter.Value().ilabel : 47 aiter.Value().olabel; 48 seen.insert(sym); 49 } 50 } 51 SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned"); 52 for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) { 53 typename Arc::Label label = stiter.Value(); 54 if (seen.find(label) != seen.end()) { 55 pruned->AddSymbol(stiter.Symbol(), stiter.Value()); 56 } 57 } 58 return pruned; 59 } 60 61 // Relabels a symbol table to make it a contiguous mapping. 62 SymbolTable *CompactSymbolTable(const SymbolTable &syms); 63 64 // Merges two SymbolTables, all symbols from left will be merged into right 65 // with the same ids. Symbols in right that have conflicting ids with those 66 // in left will be assigned to value assigned from the left SymbolTable. 67 // The returned symbol table will never modify symbol assignments from the left 68 // side, but may do so on the right. If right_relabel_output is non-NULL, it 69 // will be assigned true if the symbols from the right table needed to be 70 // reassigned. 71 // A potential use case is to Compose two Fst's that have different symbol 72 // tables. You can reconcile them in the following way: 73 // Fst<Arc> a, b; 74 // bool relabel; 75 // SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(), 76 // b.InputSymbols(), &relabel); 77 // if (relabel) { 78 // Relabel(b, bnew, NULL); 79 // } 80 // b.SetInputSymbols(bnew); 81 // delete bnew; 82 SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, 83 bool *right_relabel_output = 0); 84 85 // Read the symbol table from any Fst::Read()able file, without loading the 86 // corresponding Fst. Returns NULL if the Fst does not contain a symbol table 87 // or the symbol table cannot be read. 88 SymbolTable *FstReadSymbols(const string &filename, bool input); 89 90 } // namespace fst 91 #endif // FST_LIB_SYMBOL_TABLE_OPS_H_ 92