1 // util.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley (at) google.com (Michael Riley) 17 // 18 // \file 19 // FST utility inline definitions. 20 21 #ifndef FST_LIB_UTIL_H__ 22 #define FST_LIB_UTIL_H__ 23 24 #include <tr1/unordered_map> 25 using std::tr1::unordered_map; 26 using std::tr1::unordered_multimap; 27 #include <tr1/unordered_set> 28 using std::tr1::unordered_set; 29 using std::tr1::unordered_multiset; 30 #include <list> 31 #include <map> 32 #include <set> 33 #include <sstream> 34 #include <string> 35 #include <vector> 36 using std::vector; 37 38 39 #include <fst/compat.h> 40 #include <fst/types.h> 41 42 #include <iostream> 43 #include <fstream> 44 #include <sstream> 45 46 // 47 // UTILITY FOR ERROR HANDLING 48 // 49 50 DECLARE_bool(fst_error_fatal); 51 52 #define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) 53 54 namespace fst { 55 56 // 57 // UTILITIES FOR TYPE I/O 58 // 59 60 // Read some types from an input stream. 61 62 // Generic case. 63 template <typename T> 64 inline istream &ReadType(istream &strm, T *t) { 65 return t->Read(strm); 66 } 67 68 // Fixed size, contiguous memory read. 69 #define READ_POD_TYPE(T) \ 70 inline istream &ReadType(istream &strm, T *t) { \ 71 return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \ 72 } 73 74 READ_POD_TYPE(bool); 75 READ_POD_TYPE(char); 76 READ_POD_TYPE(signed char); 77 READ_POD_TYPE(unsigned char); 78 READ_POD_TYPE(short); 79 READ_POD_TYPE(unsigned short); 80 READ_POD_TYPE(int); 81 READ_POD_TYPE(unsigned int); 82 READ_POD_TYPE(long); 83 READ_POD_TYPE(unsigned long); 84 READ_POD_TYPE(long long); 85 READ_POD_TYPE(unsigned long long); 86 READ_POD_TYPE(float); 87 READ_POD_TYPE(double); 88 89 // String case. 90 inline istream &ReadType(istream &strm, string *s) { 91 s->clear(); 92 int32 ns = 0; 93 strm.read(reinterpret_cast<char *>(&ns), sizeof(ns)); 94 for (int i = 0; i < ns; ++i) { 95 char c; 96 strm.read(&c, 1); 97 *s += c; 98 } 99 return strm; 100 } 101 102 // Pair case. 103 template <typename S, typename T> 104 inline istream &ReadType(istream &strm, pair<S, T> *p) { 105 ReadType(strm, &p->first); 106 ReadType(strm, &p->second); 107 return strm; 108 } 109 110 template <typename S, typename T> 111 inline istream &ReadType(istream &strm, pair<const S, T> *p) { 112 ReadType(strm, const_cast<S *>(&p->first)); 113 ReadType(strm, &p->second); 114 return strm; 115 } 116 117 // General case - no-op. 118 template <typename C> 119 void StlReserve(C *c, int64 n) {} 120 121 // Specialization for vectors. 122 template <typename S, typename T> 123 void StlReserve(vector<S, T> *c, int64 n) { 124 c->reserve(n); 125 } 126 127 // STL sequence container. 128 #define READ_STL_SEQ_TYPE(C) \ 129 template <typename S, typename T> \ 130 inline istream &ReadType(istream &strm, C<S, T> *c) { \ 131 c->clear(); \ 132 int64 n = 0; \ 133 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ 134 StlReserve(c, n); \ 135 for (ssize_t i = 0; i < n; ++i) { \ 136 typename C<S, T>::value_type value; \ 137 ReadType(strm, &value); \ 138 c->insert(c->end(), value); \ 139 } \ 140 return strm; \ 141 } 142 143 READ_STL_SEQ_TYPE(vector); 144 READ_STL_SEQ_TYPE(list); 145 146 // STL associative container. 147 #define READ_STL_ASSOC_TYPE(C) \ 148 template <typename S, typename T, typename U> \ 149 inline istream &ReadType(istream &strm, C<S, T, U> *c) { \ 150 c->clear(); \ 151 int64 n = 0; \ 152 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ 153 for (ssize_t i = 0; i < n; ++i) { \ 154 typename C<S, T, U>::value_type value; \ 155 ReadType(strm, &value); \ 156 c->insert(value); \ 157 } \ 158 return strm; \ 159 } 160 161 READ_STL_ASSOC_TYPE(set); 162 READ_STL_ASSOC_TYPE(unordered_set); 163 READ_STL_ASSOC_TYPE(map); 164 READ_STL_ASSOC_TYPE(unordered_map); 165 166 // Write some types to an output stream. 167 168 // Generic case. 169 template <typename T> 170 inline ostream &WriteType(ostream &strm, const T t) { 171 t.Write(strm); 172 return strm; 173 } 174 175 // Fixed size, contiguous memory write. 176 #define WRITE_POD_TYPE(T) \ 177 inline ostream &WriteType(ostream &strm, const T t) { \ 178 return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \ 179 } 180 181 WRITE_POD_TYPE(bool); 182 WRITE_POD_TYPE(char); 183 WRITE_POD_TYPE(signed char); 184 WRITE_POD_TYPE(unsigned char); 185 WRITE_POD_TYPE(short); 186 WRITE_POD_TYPE(unsigned short); 187 WRITE_POD_TYPE(int); 188 WRITE_POD_TYPE(unsigned int); 189 WRITE_POD_TYPE(long); 190 WRITE_POD_TYPE(unsigned long); 191 WRITE_POD_TYPE(long long); 192 WRITE_POD_TYPE(unsigned long long); 193 WRITE_POD_TYPE(float); 194 WRITE_POD_TYPE(double); 195 196 // String case. 197 inline ostream &WriteType(ostream &strm, const string &s) { 198 int32 ns = s.size(); 199 strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns)); 200 return strm.write(s.data(), ns); 201 } 202 203 // Pair case. 204 template <typename S, typename T> 205 inline ostream &WriteType(ostream &strm, const pair<S, T> &p) { 206 WriteType(strm, p.first); 207 WriteType(strm, p.second); 208 return strm; 209 } 210 211 // STL sequence container. 212 #define WRITE_STL_SEQ_TYPE(C) \ 213 template <typename S, typename T> \ 214 inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \ 215 int64 n = c.size(); \ 216 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ 217 for (typename C<S, T>::const_iterator it = c.begin(); \ 218 it != c.end(); ++it) \ 219 WriteType(strm, *it); \ 220 return strm; \ 221 } 222 223 WRITE_STL_SEQ_TYPE(vector); 224 WRITE_STL_SEQ_TYPE(list); 225 226 // STL associative container. 227 #define WRITE_STL_ASSOC_TYPE(C) \ 228 template <typename S, typename T, typename U> \ 229 inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \ 230 int64 n = c.size(); \ 231 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ 232 for (typename C<S, T, U>::const_iterator it = c.begin(); \ 233 it != c.end(); ++it) \ 234 WriteType(strm, *it); \ 235 return strm; \ 236 } 237 238 WRITE_STL_ASSOC_TYPE(set); 239 WRITE_STL_ASSOC_TYPE(unordered_set); 240 WRITE_STL_ASSOC_TYPE(map); 241 WRITE_STL_ASSOC_TYPE(unordered_map); 242 243 // Utilities for converting between int64 or Weight and string. 244 245 int64 StrToInt64(const string &s, const string &src, size_t nline, 246 bool allow_negative, bool *error = 0); 247 248 template <typename Weight> 249 Weight StrToWeight(const string &s, const string &src, size_t nline) { 250 Weight w; 251 istringstream strm(s); 252 strm >> w; 253 if (!strm) { 254 FSTERROR() << "StrToWeight: Bad weight = \"" << s 255 << "\", source = " << src << ", line = " << nline; 256 return Weight::NoWeight(); 257 } 258 return w; 259 } 260 261 void Int64ToStr(int64 n, string *s); 262 263 template <typename Weight> 264 void WeightToStr(Weight w, string *s) { 265 ostringstream strm; 266 strm.precision(9); 267 strm << w; 268 s->append(strm.str().data(), strm.str().size()); 269 } 270 271 // Utilities for reading/writing integer pairs (typically labels) 272 273 // Returns true on success 274 template <typename I> 275 bool ReadIntPairs(const string& filename, 276 vector<pair<I, I> >* pairs, 277 bool allow_negative = false) { 278 ifstream strm(filename.c_str()); 279 280 if (!strm) { 281 LOG(ERROR) << "ReadIntPairs: Can't open file: " << filename; 282 return false; 283 } 284 285 const int kLineLen = 8096; 286 char line[kLineLen]; 287 size_t nline = 0; 288 289 pairs->clear(); 290 while (strm.getline(line, kLineLen)) { 291 ++nline; 292 vector<char *> col; 293 SplitToVector(line, "\n\t ", &col, true); 294 // empty line or comment? 295 if (col.size() == 0 || col[0][0] == '\0' || col[0][0] == '#') 296 continue; 297 if (col.size() != 2) { 298 LOG(ERROR) << "ReadIntPairs: Bad number of columns, " 299 << "file = " << filename << ", line = " << nline; 300 return false; 301 } 302 303 bool err; 304 I i1 = StrToInt64(col[0], filename, nline, allow_negative, &err); 305 if (err) return false; 306 I i2 = StrToInt64(col[1], filename, nline, allow_negative, &err); 307 if (err) return false; 308 pairs->push_back(make_pair(i1, i2)); 309 } 310 return true; 311 } 312 313 // Returns true on success 314 template <typename I> 315 bool WriteIntPairs(const string& filename, 316 const vector<pair<I, I> >& pairs) { 317 ostream *strm = &cout; 318 if (!filename.empty()) { 319 strm = new ofstream(filename.c_str()); 320 if (!*strm) { 321 LOG(ERROR) << "WriteIntPairs: Can't open file: " << filename; 322 return false; 323 } 324 } 325 326 for (ssize_t n = 0; n < pairs.size(); ++n) 327 *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; 328 329 if (!*strm) { 330 LOG(ERROR) << "WriteIntPairs: Write failed: " 331 << (filename.empty() ? "standard output" : filename); 332 return false; 333 } 334 if (strm != &cout) 335 delete strm; 336 return true; 337 } 338 339 // Utilities for reading/writing label pairs 340 341 template <typename Label> 342 bool ReadLabelPairs(const string& filename, 343 vector<pair<Label, Label> >* pairs, 344 bool allow_negative = false) { 345 return ReadIntPairs(filename, pairs, allow_negative); 346 } 347 348 template <typename Label> 349 bool WriteLabelPairs(const string& filename, 350 vector<pair<Label, Label> >& pairs) { 351 return WriteIntPairs(filename, pairs); 352 } 353 354 // Utilities for converting a type name to a legal C symbol. 355 356 void ConvertToLegalCSymbol(string *s); 357 358 359 // 360 // UTILITIES FOR STREAM I/O 361 // 362 363 bool AlignInput(istream &strm); 364 bool AlignOutput(ostream &strm); 365 366 // 367 // UTILITIES FOR PROTOCOL BUFFER I/O 368 // 369 370 371 // An associative container for which testing membership is 372 // faster than an STL set if members are restricted to an interval 373 // that excludes most non-members. A 'Key' must have ==, !=, and < defined. 374 // Element 'NoKey' should be a key that marks an uninitialized key and 375 // is otherwise unused. 'Find()' returns an STL const_iterator to the match 376 // found, otherwise it equals 'End()'. 377 template <class Key, Key NoKey> 378 class CompactSet { 379 public: 380 typedef typename set<Key>::const_iterator const_iterator; 381 382 CompactSet() 383 : min_key_(NoKey), 384 max_key_(NoKey) { } 385 386 CompactSet(const CompactSet<Key, NoKey> &compact_set) 387 : set_(compact_set.set_), 388 min_key_(compact_set.min_key_), 389 max_key_(compact_set.max_key_) { } 390 391 void Insert(Key key) { 392 set_.insert(key); 393 if (min_key_ == NoKey || key < min_key_) 394 min_key_ = key; 395 if (max_key_ == NoKey || max_key_ < key) 396 max_key_ = key; 397 } 398 399 void Erase(Key key) { 400 set_.erase(key); 401 if (set_.empty()) { 402 min_key_ = max_key_ = NoKey; 403 } else if (key == min_key_) { 404 ++min_key_; 405 } else if (key == max_key_) { 406 --max_key_; 407 } 408 } 409 410 void Clear() { 411 set_.clear(); 412 min_key_ = max_key_ = NoKey; 413 } 414 415 const_iterator Find(Key key) const { 416 if (min_key_ == NoKey || 417 key < min_key_ || max_key_ < key) 418 return set_.end(); 419 else 420 return set_.find(key); 421 } 422 423 bool Member(Key key) const { 424 if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { 425 return false; // out of range 426 } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { 427 return true; // dense range 428 } else { 429 return set_.find(key) != set_.end(); 430 } 431 } 432 433 const_iterator Begin() const { return set_.begin(); } 434 435 const_iterator End() const { return set_.end(); } 436 437 // All stored keys are greater than or equal to this value. 438 Key LowerBound() const { return min_key_; } 439 440 // All stored keys are less than or equal to this value. 441 Key UpperBound() const { return max_key_; } 442 443 private: 444 set<Key> set_; 445 Key min_key_; 446 Key max_key_; 447 448 void operator=(const CompactSet<Key, NoKey> &); //disallow 449 }; 450 451 } // namespace fst 452 453 #endif // FST_LIB_UTIL_H__ 454