Home | History | Annotate | Download | only in fst
      1 // util.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // FST utility inline definitions.
     20 
     21 #ifndef FST_LIB_UTIL_H__
     22 #define FST_LIB_UTIL_H__
     23 
     24 #include <tr1/unordered_map>
     25 using std::tr1::unordered_map;
     26 using std::tr1::unordered_multimap;
     27 #include <tr1/unordered_set>
     28 using std::tr1::unordered_set;
     29 using std::tr1::unordered_multiset;
     30 #include <list>
     31 #include <map>
     32 #include <set>
     33 #include <sstream>
     34 #include <string>
     35 #include <vector>
     36 using std::vector;
     37 
     38 
     39 #include <fst/compat.h>
     40 #include <fst/types.h>
     41 
     42 #include <iostream>
     43 #include <fstream>
     44 #include <sstream>
     45 
     46 //
     47 // UTILITY FOR ERROR HANDLING
     48 //
     49 
     50 DECLARE_bool(fst_error_fatal);
     51 
     52 #define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
     53 
     54 namespace fst {
     55 
     56 //
     57 // UTILITIES FOR TYPE I/O
     58 //
     59 
     60 // Read some types from an input stream.
     61 
     62 // Generic case.
     63 template <typename T>
     64 inline istream &ReadType(istream &strm, T *t) {
     65   return t->Read(strm);
     66 }
     67 
     68 // Fixed size, contiguous memory read.
     69 #define READ_POD_TYPE(T)                                    \
     70 inline istream &ReadType(istream &strm, T *t) {             \
     71   return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
     72 }
     73 
     74 READ_POD_TYPE(bool);
     75 READ_POD_TYPE(char);
     76 READ_POD_TYPE(signed char);
     77 READ_POD_TYPE(unsigned char);
     78 READ_POD_TYPE(short);
     79 READ_POD_TYPE(unsigned short);
     80 READ_POD_TYPE(int);
     81 READ_POD_TYPE(unsigned int);
     82 READ_POD_TYPE(long);
     83 READ_POD_TYPE(unsigned long);
     84 READ_POD_TYPE(long long);
     85 READ_POD_TYPE(unsigned long long);
     86 READ_POD_TYPE(float);
     87 READ_POD_TYPE(double);
     88 
     89 // String case.
     90 inline istream &ReadType(istream &strm, string *s) {
     91   s->clear();
     92   int32 ns = 0;
     93   strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
     94   for (int i = 0; i < ns; ++i) {
     95     char c;
     96     strm.read(&c, 1);
     97     *s += c;
     98   }
     99   return strm;
    100 }
    101 
    102 // Pair case.
    103 template <typename S, typename T>
    104 inline istream &ReadType(istream &strm, pair<S, T> *p) {
    105   ReadType(strm, &p->first);
    106   ReadType(strm, &p->second);
    107   return strm;
    108 }
    109 
    110 template <typename S, typename T>
    111 inline istream &ReadType(istream &strm, pair<const S, T> *p) {
    112   ReadType(strm, const_cast<S *>(&p->first));
    113   ReadType(strm, &p->second);
    114   return strm;
    115 }
    116 
    117 // General case - no-op.
    118 template <typename C>
    119 void StlReserve(C *c, int64 n) {}
    120 
    121 // Specialization for vectors.
    122 template <typename S, typename T>
    123 void StlReserve(vector<S, T> *c, int64 n) {
    124   c->reserve(n);
    125 }
    126 
    127 // STL sequence container.
    128 #define READ_STL_SEQ_TYPE(C)                             \
    129 template <typename S, typename T>                        \
    130 inline istream &ReadType(istream &strm, C<S, T> *c) {    \
    131   c->clear();                                            \
    132   int64 n = 0;                                           \
    133   strm.read(reinterpret_cast<char *>(&n), sizeof(n));    \
    134   StlReserve(c, n);                                      \
    135   for (ssize_t i = 0; i < n; ++i) {                      \
    136     typename C<S, T>::value_type value;                  \
    137     ReadType(strm, &value);                              \
    138     c->insert(c->end(), value);                          \
    139   }                                                      \
    140   return strm;                                           \
    141 }
    142 
    143 READ_STL_SEQ_TYPE(vector);
    144 READ_STL_SEQ_TYPE(list);
    145 
    146 // STL associative container.
    147 #define READ_STL_ASSOC_TYPE(C)                           \
    148 template <typename S, typename T, typename U>            \
    149 inline istream &ReadType(istream &strm, C<S, T, U> *c) { \
    150   c->clear();                                            \
    151   int64 n = 0;                                           \
    152   strm.read(reinterpret_cast<char *>(&n), sizeof(n));    \
    153   for (ssize_t i = 0; i < n; ++i) {                      \
    154     typename C<S, T, U>::value_type value;               \
    155     ReadType(strm, &value);                              \
    156     c->insert(value);                                    \
    157   }                                                      \
    158   return strm;                                           \
    159 }
    160 
    161 READ_STL_ASSOC_TYPE(set);
    162 READ_STL_ASSOC_TYPE(unordered_set);
    163 READ_STL_ASSOC_TYPE(map);
    164 READ_STL_ASSOC_TYPE(unordered_map);
    165 
    166 // Write some types to an output stream.
    167 
    168 // Generic case.
    169 template <typename T>
    170 inline ostream &WriteType(ostream &strm, const T t) {
    171   t.Write(strm);
    172   return strm;
    173 }
    174 
    175 // Fixed size, contiguous memory write.
    176 #define WRITE_POD_TYPE(T)                                           \
    177 inline ostream &WriteType(ostream &strm, const T t) {               \
    178   return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \
    179 }
    180 
    181 WRITE_POD_TYPE(bool);
    182 WRITE_POD_TYPE(char);
    183 WRITE_POD_TYPE(signed char);
    184 WRITE_POD_TYPE(unsigned char);
    185 WRITE_POD_TYPE(short);
    186 WRITE_POD_TYPE(unsigned short);
    187 WRITE_POD_TYPE(int);
    188 WRITE_POD_TYPE(unsigned int);
    189 WRITE_POD_TYPE(long);
    190 WRITE_POD_TYPE(unsigned long);
    191 WRITE_POD_TYPE(long long);
    192 WRITE_POD_TYPE(unsigned long long);
    193 WRITE_POD_TYPE(float);
    194 WRITE_POD_TYPE(double);
    195 
    196 // String case.
    197 inline ostream &WriteType(ostream &strm, const string &s) {
    198   int32 ns = s.size();
    199   strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
    200   return strm.write(s.data(), ns);
    201 }
    202 
    203 // Pair case.
    204 template <typename S, typename T>
    205 inline ostream &WriteType(ostream &strm, const pair<S, T> &p) {
    206   WriteType(strm, p.first);
    207   WriteType(strm, p.second);
    208   return strm;
    209 }
    210 
    211 // STL sequence container.
    212 #define WRITE_STL_SEQ_TYPE(C)                                                \
    213 template <typename S, typename T>                                            \
    214 inline ostream &WriteType(ostream &strm, const C<S, T> &c) {                 \
    215   int64 n = c.size();                                                        \
    216   strm.write(reinterpret_cast<char *>(&n), sizeof(n));                       \
    217   for (typename C<S, T>::const_iterator it = c.begin();                      \
    218        it != c.end(); ++it)                                                  \
    219      WriteType(strm, *it);                                                   \
    220   return strm;                                                               \
    221 }
    222 
    223 WRITE_STL_SEQ_TYPE(vector);
    224 WRITE_STL_SEQ_TYPE(list);
    225 
    226 // STL associative container.
    227 #define WRITE_STL_ASSOC_TYPE(C)                                              \
    228 template <typename S, typename T, typename U>                                \
    229 inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) {              \
    230   int64 n = c.size();                                                        \
    231   strm.write(reinterpret_cast<char *>(&n), sizeof(n));                       \
    232   for (typename C<S, T, U>::const_iterator it = c.begin();                   \
    233        it != c.end(); ++it)                                                  \
    234      WriteType(strm, *it);                                                   \
    235   return strm;                                                               \
    236 }
    237 
    238 WRITE_STL_ASSOC_TYPE(set);
    239 WRITE_STL_ASSOC_TYPE(unordered_set);
    240 WRITE_STL_ASSOC_TYPE(map);
    241 WRITE_STL_ASSOC_TYPE(unordered_map);
    242 
    243 // Utilities for converting between int64 or Weight and string.
    244 
    245 int64 StrToInt64(const string &s, const string &src, size_t nline,
    246                  bool allow_negative, bool *error = 0);
    247 
    248 template <typename Weight>
    249 Weight StrToWeight(const string &s, const string &src, size_t nline) {
    250   Weight w;
    251   istringstream strm(s);
    252   strm >> w;
    253   if (!strm) {
    254     FSTERROR() << "StrToWeight: Bad weight = \"" << s
    255                << "\", source = " << src << ", line = " << nline;
    256     return Weight::NoWeight();
    257   }
    258   return w;
    259 }
    260 
    261 void Int64ToStr(int64 n, string *s);
    262 
    263 template <typename Weight>
    264 void WeightToStr(Weight w, string *s) {
    265   ostringstream strm;
    266   strm.precision(9);
    267   strm << w;
    268   s->append(strm.str().data(), strm.str().size());
    269 }
    270 
    271 // Utilities for reading/writing integer pairs (typically labels)
    272 
    273 // Returns true on success
    274 template <typename I>
    275 bool ReadIntPairs(const string& filename,
    276                     vector<pair<I, I> >* pairs,
    277                     bool allow_negative = false) {
    278   ifstream strm(filename.c_str());
    279 
    280   if (!strm) {
    281     LOG(ERROR) << "ReadIntPairs: Can't open file: " << filename;
    282     return false;
    283   }
    284 
    285   const int kLineLen = 8096;
    286   char line[kLineLen];
    287   size_t nline = 0;
    288 
    289   pairs->clear();
    290   while (strm.getline(line, kLineLen)) {
    291     ++nline;
    292     vector<char *> col;
    293     SplitToVector(line, "\n\t ", &col, true);
    294     // empty line or comment?
    295     if (col.size() == 0 || col[0][0] == '\0' || col[0][0] == '#')
    296       continue;
    297     if (col.size() != 2) {
    298       LOG(ERROR) << "ReadIntPairs: Bad number of columns, "
    299                  << "file = " << filename << ", line = " << nline;
    300       return false;
    301     }
    302 
    303     bool err;
    304     I i1 = StrToInt64(col[0], filename, nline, allow_negative, &err);
    305     if (err) return false;
    306     I i2 = StrToInt64(col[1], filename, nline, allow_negative, &err);
    307     if (err) return false;
    308     pairs->push_back(make_pair(i1, i2));
    309   }
    310   return true;
    311 }
    312 
    313 // Returns true on success
    314 template <typename I>
    315 bool WriteIntPairs(const string& filename,
    316                    const vector<pair<I, I> >& pairs) {
    317   ostream *strm = &cout;
    318   if (!filename.empty()) {
    319     strm = new ofstream(filename.c_str());
    320     if (!*strm) {
    321       LOG(ERROR) << "WriteIntPairs: Can't open file: " << filename;
    322       return false;
    323     }
    324   }
    325 
    326   for (ssize_t n = 0; n < pairs.size(); ++n)
    327     *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
    328 
    329   if (!*strm) {
    330     LOG(ERROR) << "WriteIntPairs: Write failed: "
    331                << (filename.empty() ? "standard output" : filename);
    332     return false;
    333   }
    334   if (strm != &cout)
    335     delete strm;
    336   return true;
    337 }
    338 
    339 // Utilities for reading/writing label pairs
    340 
    341 template <typename Label>
    342 bool ReadLabelPairs(const string& filename,
    343                     vector<pair<Label, Label> >* pairs,
    344                     bool allow_negative = false) {
    345   return ReadIntPairs(filename, pairs, allow_negative);
    346 }
    347 
    348 template <typename Label>
    349 bool WriteLabelPairs(const string& filename,
    350                      vector<pair<Label, Label> >& pairs) {
    351   return WriteIntPairs(filename, pairs);
    352 }
    353 
    354 // Utilities for converting a type name to a legal C symbol.
    355 
    356 void ConvertToLegalCSymbol(string *s);
    357 
    358 
    359 //
    360 // UTILITIES FOR STREAM I/O
    361 //
    362 
    363 bool AlignInput(istream &strm);
    364 bool AlignOutput(ostream &strm);
    365 
    366 //
    367 // UTILITIES FOR PROTOCOL BUFFER I/O
    368 //
    369 
    370 
    371 // An associative container for which testing membership is
    372 // faster than an STL set if members are restricted to an interval
    373 // that excludes most non-members. A 'Key' must have ==, !=, and < defined.
    374 // Element 'NoKey' should be a key that marks an uninitialized key and
    375 // is otherwise unused. 'Find()' returns an STL const_iterator to the match
    376 // found, otherwise it equals 'End()'.
    377 template <class Key, Key NoKey>
    378 class CompactSet {
    379 public:
    380   typedef typename set<Key>::const_iterator const_iterator;
    381 
    382   CompactSet()
    383     : min_key_(NoKey),
    384       max_key_(NoKey) { }
    385 
    386   CompactSet(const CompactSet<Key, NoKey> &compact_set)
    387     : set_(compact_set.set_),
    388       min_key_(compact_set.min_key_),
    389       max_key_(compact_set.max_key_) { }
    390 
    391   void Insert(Key key) {
    392     set_.insert(key);
    393     if (min_key_ == NoKey || key < min_key_)
    394       min_key_ = key;
    395     if (max_key_ == NoKey || max_key_ < key)
    396         max_key_ = key;
    397   }
    398 
    399   void Erase(Key key) {
    400     set_.erase(key);
    401     if (set_.empty()) {
    402         min_key_ = max_key_ = NoKey;
    403     } else if (key == min_key_) {
    404       ++min_key_;
    405     } else if (key == max_key_) {
    406       --max_key_;
    407     }
    408   }
    409 
    410   void Clear() {
    411     set_.clear();
    412     min_key_ = max_key_ = NoKey;
    413   }
    414 
    415   const_iterator Find(Key key) const {
    416     if (min_key_ == NoKey ||
    417         key < min_key_ || max_key_ < key)
    418       return set_.end();
    419     else
    420       return set_.find(key);
    421   }
    422 
    423   bool Member(Key key) const {
    424     if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
    425       return false;   // out of range
    426     } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) {
    427       return true;    // dense range
    428     } else {
    429       return set_.find(key) != set_.end();
    430     }
    431   }
    432 
    433   const_iterator Begin() const { return set_.begin(); }
    434 
    435   const_iterator End() const { return set_.end(); }
    436 
    437   // All stored keys are greater than or equal to this value.
    438   Key LowerBound() const { return min_key_; }
    439 
    440   // All stored keys are less than or equal to this value.
    441   Key UpperBound() const { return max_key_; }
    442 
    443 private:
    444   set<Key> set_;
    445   Key min_key_;
    446   Key max_key_;
    447 
    448   void operator=(const CompactSet<Key, NoKey> &);  //disallow
    449 };
    450 
    451 }  // namespace fst
    452 
    453 #endif  // FST_LIB_UTIL_H__
    454