Home | History | Annotate | Download | only in lib
      1 
      2 // Licensed under the Apache License, Version 2.0 (the "License");
      3 // you may not use this file except in compliance with the License.
      4 // You may obtain a copy of the License at
      5 //
      6 //     http://www.apache.org/licenses/LICENSE-2.0
      7 //
      8 // Unless required by applicable law or agreed to in writing, software
      9 // distributed under the License is distributed on an "AS IS" BASIS,
     10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     11 // See the License for the specific language governing permissions and
     12 // limitations under the License.
     13 //
     14 // Copyright 2005-2010 Google, Inc.
     15 // Author: sorenj (at) google.com (Jeffrey Sorensen)
     16 
     17 #include <fst/symbol-table-ops.h>
     18 
     19 namespace fst {
     20 
     21 SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
     22                               bool *right_relabel_output) {
     23   // MergeSymbolTable detects several special cases.  It will return a reference
     24   // copied version of SymbolTable of left or right if either symbol table is
     25   // a superset of the other.
     26   SymbolTable *merged = new SymbolTable("merge_" + left.Name() + "_" +
     27                                         right.Name());
     28   // copy everything from the left symbol table
     29   bool left_has_all = true, right_has_all = true, relabel = false;
     30   SymbolTableIterator liter(left);
     31   for (; !liter.Done(); liter.Next()) {
     32     merged->AddSymbol(liter.Symbol(), liter.Value());
     33     if (right_has_all) {
     34       int64 key = right.Find(liter.Symbol());
     35       if (key == -1) {
     36         right_has_all = false;
     37       } else if (!relabel && key != liter.Value()) {
     38         relabel = true;
     39       }
     40     }
     41   }
     42   if (right_has_all) {
     43     delete merged;
     44     if (right_relabel_output != NULL) {
     45       *right_relabel_output = relabel;
     46     }
     47     return right.Copy();
     48   }
     49   // add all symbols we can from right symbol table
     50   vector<string> conflicts;
     51   SymbolTableIterator riter(right);
     52   for (; !riter.Done(); riter.Next()) {
     53     int64 key = merged->Find(riter.Symbol());
     54     if (key != -1) {
     55       // Symbol already exists, maybe with different value
     56       if (key != riter.Value()) {
     57         relabel = true;
     58       }
     59       continue;
     60     }
     61     // Symbol doesn't exist from left
     62     left_has_all = false;
     63     if (!merged->Find(riter.Value()).empty()) {
     64       // we can't add this where we want to, add it later, in order
     65       conflicts.push_back(riter.Symbol());
     66       continue;
     67     }
     68     // there is a hole and we can add this symbol with its id
     69     merged->AddSymbol(riter.Symbol(), riter.Value());
     70   }
     71   if (right_relabel_output != NULL) {
     72     *right_relabel_output = relabel;
     73   }
     74   if (left_has_all) {
     75     delete merged;
     76     return left.Copy();
     77   }
     78   // Add all symbols that conflicted, in order
     79   for (int i= 0; i < conflicts.size(); ++i) {
     80     merged->AddSymbol(conflicts[i]);
     81   }
     82   return merged;
     83 }
     84 
     85 SymbolTable *CompactSymbolTable(const SymbolTable &syms) {
     86   map<int, string> sorted;
     87   SymbolTableIterator stiter(syms);
     88   for (; !stiter.Done(); stiter.Next()) {
     89     sorted[stiter.Value()] = stiter.Symbol();
     90   }
     91   SymbolTable *compact = new SymbolTable(syms.Name() + "_compact");
     92   uint64 newkey = 0;
     93   for (map<int, string>::const_iterator si = sorted.begin();
     94        si != sorted.end(); ++si) {
     95     compact->AddSymbol(si->second, newkey++);
     96   }
     97   return compact;
     98 }
     99 
    100 SymbolTable *FstReadSymbols(const string &filename, bool input_symbols) {
    101   ifstream in(filename.c_str(), ifstream::in | ifstream::binary);
    102   if (!in) {
    103     LOG(ERROR) << "FstReadSymbols: Can't open file " << filename;
    104     return NULL;
    105   }
    106   FstHeader hdr;
    107   if (!hdr.Read(in, filename)) {
    108     LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << filename;
    109     return NULL;
    110   }
    111   if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) {
    112     SymbolTable *isymbols = SymbolTable::Read(in, filename);
    113     if (isymbols == NULL) {
    114       LOG(ERROR) << "FstReadSymbols: Could not read input symbols from "
    115                  << filename;
    116       return NULL;
    117     }
    118     if (input_symbols) {
    119       return isymbols;
    120     }
    121     delete isymbols;
    122   }
    123   if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) {
    124     SymbolTable *osymbols = SymbolTable::Read(in, filename);
    125     if (osymbols == NULL) {
    126       LOG(ERROR) << "FstReadSymbols: Could not read output symbols from "
    127                  << filename;
    128       return NULL;
    129     }
    130     if (!input_symbols) {
    131       return osymbols;
    132     }
    133     delete osymbols;
    134   }
    135   LOG(ERROR) << "FstReadSymbols: The file " << filename
    136              << " doesn't contain the requested symbols";
    137   return NULL;
    138 }
    139 
    140 }  // namespace fst
    141