Home | History | Annotate | Download | only in lib
      1 // symbol-table.cc
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 //
     16 // \file
     17 // Classes to provide symbol-to-integer and integer-to-symbol mappings.
     18 
     19 #include "fst/lib/symbol-table.h"
     20 #include "fst/lib/util.h"
     21 
     22 #include <string.h>
     23 
     24 DEFINE_bool(fst_compat_symbols, true,
     25             "Require symbol tables to match when appropriate");
     26 
     27 namespace fst {
     28 
     29 // Maximum line length in textual symbols file.
     30 const int kLineLen = 8096;
     31 
     32 // Identifies stream data as a symbol table (and its endianity)
     33 static const int32 kSymbolTableMagicNumber = 2125658996;
     34 
     35 SymbolTableImpl* SymbolTableImpl::ReadText(const string &filename) {
     36   ifstream strm(filename.c_str());
     37   if (!strm) {
     38     LOG(ERROR) << "SymbolTable::ReadText: Can't open symbol file: "
     39                << filename;
     40     return 0;
     41   }
     42 
     43   SymbolTableImpl* impl = new SymbolTableImpl(filename);
     44 
     45   int64 nline = 0;
     46   char line[kLineLen];
     47   while (strm.getline(line, kLineLen)) {
     48     ++nline;
     49     vector<char *> col;
     50     SplitToVector(line, "\n\t ", &col, true);
     51     if (col.size() == 0)  // empty line
     52       continue;
     53     if (col.size() != 2) {
     54       LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns (skipping), "
     55                  << "file = " << filename << ", line = " << nline;
     56       continue;
     57     }
     58     const char *symbol = col[0];
     59     const char *value = col[1];
     60     char *p;
     61     int64 key = strtoll(value, &p, 10);
     62     if (p < value + strlen(value) || key < 0) {
     63       LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \""
     64                  << value << "\" (skipping), "
     65                  << "file = " << filename << ", line = " << nline;
     66       continue;
     67     }
     68     impl->AddSymbol(symbol, key);
     69   }
     70 
     71   return impl;
     72 }
     73 
     74 void SymbolTableImpl::RecomputeCheckSum() const {
     75   check_sum_.Reset();
     76   for (size_t i = 0; i < symbols_.size(); ++i) {
     77     check_sum_.Update(symbols_[i], strlen(symbols_[i])+1);
     78   }
     79   check_sum_finalized_ = true;
     80 }
     81 
     82 int64 SymbolTableImpl::AddSymbol(const string& symbol, int64 key) {
     83   std::unordered_map<string, int64>::const_iterator it =
     84     symbol_map_.find(symbol);
     85   if (it == symbol_map_.end()) {  // only add if not in table
     86     check_sum_finalized_ = false;
     87 
     88     char *csymbol = new char[symbol.size() + 1];
     89     strcpy(csymbol, symbol.c_str());
     90     symbols_.push_back(csymbol);
     91     key_map_[key] = csymbol;
     92     symbol_map_[csymbol] = key;
     93 
     94     if (key >= available_key_) {
     95       available_key_ = key + 1;
     96     }
     97   }
     98 
     99   return key;
    100 }
    101 
    102 SymbolTableImpl* SymbolTableImpl::Read(istream &strm,
    103                                        const string &source) {
    104   int32 magic_number = 0;
    105   ReadType(strm, &magic_number);
    106   if (magic_number != kSymbolTableMagicNumber) {
    107     LOG(ERROR) << "SymbolTable::Read: read failed";
    108     return 0;
    109   }
    110   string name;
    111   ReadType(strm, &name);
    112   SymbolTableImpl* impl = new SymbolTableImpl(name);
    113   ReadType(strm, &impl->available_key_);
    114   int64 size;
    115   ReadType(strm, &size);
    116   string symbol;
    117   int64 key = 0;
    118   for (size_t i = 0; i < size; ++i) {
    119     ReadType(strm, &symbol);
    120     ReadType(strm, &key);
    121     impl->AddSymbol(symbol, key);
    122   }
    123   if (!strm)
    124     LOG(ERROR) << "SymbolTable::Read: read failed";
    125   return impl;
    126 }
    127 
    128 bool SymbolTableImpl::Write(ostream &strm) const {
    129   WriteType(strm, kSymbolTableMagicNumber);
    130   WriteType(strm, name_);
    131   WriteType(strm, available_key_);
    132   int64 size = symbols_.size();
    133   WriteType(strm, size);
    134   for (size_t i = 0; i < symbols_.size(); ++i) {
    135     const string symbol = symbols_[i];
    136     WriteType(strm, symbol);
    137     std::unordered_map<string, int64>::const_iterator it = symbol_map_.find(symbol);
    138     WriteType(strm, it->second);
    139   }
    140   strm.flush();
    141   if (!strm) {
    142     LOG(ERROR) << "SymbolTable::Write: write failed";
    143     return false;
    144   }
    145   return true;
    146 }
    147 
    148 bool SymbolTableImpl::WriteText(ostream &strm) const {
    149   for (size_t i = 0; i < symbols_.size(); ++i) {
    150     char line[kLineLen];
    151     snprintf(line, kLineLen, "%s\t%lld\n", symbols_[i], Find(symbols_[i]));
    152     strm.write(line, strlen(line));
    153   }
    154   strm.flush();
    155   if (!strm) {
    156     LOG(ERROR) << "SymbolTable::WriteText: write failed";
    157     return false;
    158   }
    159   return true;
    160 }
    161 
    162 }  // namespace fst
    163