1 // symbol-table.cc 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // 16 // \file 17 // Classes to provide symbol-to-integer and integer-to-symbol mappings. 18 19 #include "fst/lib/symbol-table.h" 20 #include "fst/lib/util.h" 21 22 #include <string.h> 23 24 DEFINE_bool(fst_compat_symbols, true, 25 "Require symbol tables to match when appropriate"); 26 27 namespace fst { 28 29 // Maximum line length in textual symbols file. 30 const int kLineLen = 8096; 31 32 // Identifies stream data as a symbol table (and its endianity) 33 static const int32 kSymbolTableMagicNumber = 2125658996; 34 35 SymbolTableImpl* SymbolTableImpl::ReadText(const string &filename) { 36 ifstream strm(filename.c_str()); 37 if (!strm) { 38 LOG(ERROR) << "SymbolTable::ReadText: Can't open symbol file: " 39 << filename; 40 return 0; 41 } 42 43 SymbolTableImpl* impl = new SymbolTableImpl(filename); 44 45 int64 nline = 0; 46 char line[kLineLen]; 47 while (strm.getline(line, kLineLen)) { 48 ++nline; 49 vector<char *> col; 50 SplitToVector(line, "\n\t ", &col, true); 51 if (col.size() == 0) // empty line 52 continue; 53 if (col.size() != 2) { 54 LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns (skipping), " 55 << "file = " << filename << ", line = " << nline; 56 continue; 57 } 58 const char *symbol = col[0]; 59 const char *value = col[1]; 60 char *p; 61 int64 key = strtoll(value, &p, 10); 62 if (p < value + strlen(value) || key < 0) { 63 LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \"" 64 << value << "\" (skipping), " 65 << "file = " << filename << ", line = " << nline; 66 continue; 67 } 68 impl->AddSymbol(symbol, key); 69 } 70 71 return impl; 72 } 73 74 void SymbolTableImpl::RecomputeCheckSum() const { 75 check_sum_.Reset(); 76 for (size_t i = 0; i < symbols_.size(); ++i) { 77 check_sum_.Update(symbols_[i], strlen(symbols_[i])+1); 78 } 79 check_sum_finalized_ = true; 80 } 81 82 int64 SymbolTableImpl::AddSymbol(const string& symbol, int64 key) { 83 hash_map<string, int64>::const_iterator it = 84 symbol_map_.find(symbol); 85 if (it == symbol_map_.end()) { // only add if not in table 86 check_sum_finalized_ = false; 87 88 char *csymbol = new char[symbol.size() + 1]; 89 strcpy(csymbol, symbol.c_str()); 90 symbols_.push_back(csymbol); 91 key_map_[key] = csymbol; 92 symbol_map_[csymbol] = key; 93 94 if (key >= available_key_) { 95 available_key_ = key + 1; 96 } 97 } 98 99 return key; 100 } 101 102 SymbolTableImpl* SymbolTableImpl::Read(istream &strm, 103 const string &source) { 104 int32 magic_number = 0; 105 ReadType(strm, &magic_number); 106 if (magic_number != kSymbolTableMagicNumber) { 107 LOG(ERROR) << "SymbolTable::Read: read failed"; 108 return 0; 109 } 110 string name; 111 ReadType(strm, &name); 112 SymbolTableImpl* impl = new SymbolTableImpl(name); 113 ReadType(strm, &impl->available_key_); 114 int64 size; 115 ReadType(strm, &size); 116 string symbol; 117 int64 key = 0; 118 for (size_t i = 0; i < size; ++i) { 119 ReadType(strm, &symbol); 120 ReadType(strm, &key); 121 impl->AddSymbol(symbol, key); 122 } 123 if (!strm) 124 LOG(ERROR) << "SymbolTable::Read: read failed"; 125 return impl; 126 } 127 128 bool SymbolTableImpl::Write(ostream &strm) const { 129 WriteType(strm, kSymbolTableMagicNumber); 130 WriteType(strm, name_); 131 WriteType(strm, available_key_); 132 int64 size = symbols_.size(); 133 WriteType(strm, size); 134 for (size_t i = 0; i < symbols_.size(); ++i) { 135 const string symbol = symbols_[i]; 136 WriteType(strm, symbol); 137 hash_map<string, int64>::const_iterator it = symbol_map_.find(symbol); 138 WriteType(strm, it->second); 139 } 140 strm.flush(); 141 if (!strm) 142 LOG(ERROR) << "SymbolTable::Write: write failed"; 143 return strm; 144 } 145 146 bool SymbolTableImpl::WriteText(ostream &strm) const { 147 for (size_t i = 0; i < symbols_.size(); ++i) { 148 char line[kLineLen]; 149 snprintf(line, kLineLen, "%s\t%lld\n", symbols_[i], Find(symbols_[i])); 150 strm.write(line, strlen(line)); 151 } 152 strm.flush(); 153 if (!strm) 154 LOG(ERROR) << "SymbolTable::WriteText: write failed"; 155 return strm; 156 } 157 158 } // namespace fst 159