Home | History | Annotate | Download | only in fst
      1 // util.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // FST utility inline definitions.
     20 
     21 #ifndef FST_LIB_UTIL_H__
     22 #define FST_LIB_UTIL_H__
     23 
     24 #include <unordered_map>
     25 using std::tr1::unordered_map;
     26 using std::tr1::unordered_multimap;
     27 #include <unordered_set>
     28 using std::tr1::unordered_set;
     29 using std::tr1::unordered_multiset;
     30 #include <list>
     31 #include <map>
     32 #include <set>
     33 #include <sstream>
     34 #include <string>
     35 #include <vector>
     36 using std::vector;
     37 
     38 
     39 #include <fst/compat.h>
     40 #include <fst/types.h>
     41 
     42 #include <iostream>
     43 #include <fstream>
     44 
     45 //
     46 // UTILITY FOR ERROR HANDLING
     47 //
     48 
     49 DECLARE_bool(fst_error_fatal);
     50 
     51 #define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
     52 
     53 namespace fst {
     54 
     55 //
     56 // UTILITIES FOR TYPE I/O
     57 //
     58 
     59 // Read some types from an input stream.
     60 
     61 // Generic case.
     62 template <typename T>
     63 inline istream &ReadType(istream &strm, T *t) {
     64   return t->Read(strm);
     65 }
     66 
     67 // Fixed size, contiguous memory read.
     68 #define READ_POD_TYPE(T)                                    \
     69 inline istream &ReadType(istream &strm, T *t) {             \
     70   return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
     71 }
     72 
     73 READ_POD_TYPE(bool);
     74 READ_POD_TYPE(char);
     75 READ_POD_TYPE(signed char);
     76 READ_POD_TYPE(unsigned char);
     77 READ_POD_TYPE(short);
     78 READ_POD_TYPE(unsigned short);
     79 READ_POD_TYPE(int);
     80 READ_POD_TYPE(unsigned int);
     81 READ_POD_TYPE(long);
     82 READ_POD_TYPE(unsigned long);
     83 READ_POD_TYPE(long long);
     84 READ_POD_TYPE(unsigned long long);
     85 READ_POD_TYPE(float);
     86 READ_POD_TYPE(double);
     87 
     88 // String case.
     89 inline istream &ReadType(istream &strm, string *s) {
     90   s->clear();
     91   int32 ns = 0;
     92   strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
     93   for (int i = 0; i < ns; ++i) {
     94     char c;
     95     strm.read(&c, 1);
     96     *s += c;
     97   }
     98   return strm;
     99 }
    100 
    101 // Pair case.
    102 template <typename S, typename T>
    103 inline istream &ReadType(istream &strm, pair<S, T> *p) {
    104   ReadType(strm, &p->first);
    105   ReadType(strm, &p->second);
    106   return strm;
    107 }
    108 
    109 template <typename S, typename T>
    110 inline istream &ReadType(istream &strm, pair<const S, T> *p) {
    111   ReadType(strm, const_cast<S *>(&p->first));
    112   ReadType(strm, &p->second);
    113   return strm;
    114 }
    115 
    116 // General case - no-op.
    117 template <typename C>
    118 void StlReserve(C *c, int64 n) {}
    119 
    120 // Specialization for vectors.
    121 template <typename S, typename T>
    122 void StlReserve(vector<S, T> *c, int64 n) {
    123   c->reserve(n);
    124 }
    125 
    126 // STL sequence container.
    127 #define READ_STL_SEQ_TYPE(C)                             \
    128 template <typename S, typename T>                        \
    129 inline istream &ReadType(istream &strm, C<S, T> *c) {    \
    130   c->clear();                                            \
    131   int64 n = 0;                                           \
    132   strm.read(reinterpret_cast<char *>(&n), sizeof(n));    \
    133   StlReserve(c, n);                                      \
    134   for (ssize_t i = 0; i < n; ++i) {                      \
    135     typename C<S, T>::value_type value;                  \
    136     ReadType(strm, &value);                              \
    137     c->insert(c->end(), value);                          \
    138   }                                                      \
    139   return strm;                                           \
    140 }
    141 
    142 READ_STL_SEQ_TYPE(vector);
    143 READ_STL_SEQ_TYPE(list);
    144 
    145 // STL associative container.
    146 #define READ_STL_ASSOC_TYPE(C)                           \
    147 template <typename S, typename T, typename U>            \
    148 inline istream &ReadType(istream &strm, C<S, T, U> *c) { \
    149   c->clear();                                            \
    150   int64 n = 0;                                           \
    151   strm.read(reinterpret_cast<char *>(&n), sizeof(n));    \
    152   for (ssize_t i = 0; i < n; ++i) {                      \
    153     typename C<S, T, U>::value_type value;               \
    154     ReadType(strm, &value);                              \
    155     c->insert(value);                                    \
    156   }                                                      \
    157   return strm;                                           \
    158 }
    159 
    160 READ_STL_ASSOC_TYPE(set);
    161 READ_STL_ASSOC_TYPE(unordered_set);
    162 READ_STL_ASSOC_TYPE(map);
    163 READ_STL_ASSOC_TYPE(unordered_map);
    164 
    165 // Write some types to an output stream.
    166 
    167 // Generic case.
    168 template <typename T>
    169 inline ostream &WriteType(ostream &strm, const T t) {
    170   t.Write(strm);
    171   return strm;
    172 }
    173 
    174 // Fixed size, contiguous memory write.
    175 #define WRITE_POD_TYPE(T)                                           \
    176 inline ostream &WriteType(ostream &strm, const T t) {               \
    177   return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \
    178 }
    179 
    180 WRITE_POD_TYPE(bool);
    181 WRITE_POD_TYPE(char);
    182 WRITE_POD_TYPE(signed char);
    183 WRITE_POD_TYPE(unsigned char);
    184 WRITE_POD_TYPE(short);
    185 WRITE_POD_TYPE(unsigned short);
    186 WRITE_POD_TYPE(int);
    187 WRITE_POD_TYPE(unsigned int);
    188 WRITE_POD_TYPE(long);
    189 WRITE_POD_TYPE(unsigned long);
    190 WRITE_POD_TYPE(long long);
    191 WRITE_POD_TYPE(unsigned long long);
    192 WRITE_POD_TYPE(float);
    193 WRITE_POD_TYPE(double);
    194 
    195 // String case.
    196 inline ostream &WriteType(ostream &strm, const string &s) {
    197   int32 ns = s.size();
    198   strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
    199   return strm.write(s.data(), ns);
    200 }
    201 
    202 // Pair case.
    203 template <typename S, typename T>
    204 inline ostream &WriteType(ostream &strm, const pair<S, T> &p) {
    205   WriteType(strm, p.first);
    206   WriteType(strm, p.second);
    207   return strm;
    208 }
    209 
    210 // STL sequence container.
    211 #define WRITE_STL_SEQ_TYPE(C)                                                \
    212 template <typename S, typename T>                                            \
    213 inline ostream &WriteType(ostream &strm, const C<S, T> &c) {                 \
    214   int64 n = c.size();                                                        \
    215   strm.write(reinterpret_cast<char *>(&n), sizeof(n));                       \
    216   for (typename C<S, T>::const_iterator it = c.begin();                      \
    217        it != c.end(); ++it)                                                  \
    218      WriteType(strm, *it);                                                   \
    219   return strm;                                                               \
    220 }
    221 
    222 WRITE_STL_SEQ_TYPE(vector);
    223 WRITE_STL_SEQ_TYPE(list);
    224 
    225 // STL associative container.
    226 #define WRITE_STL_ASSOC_TYPE(C)                                              \
    227 template <typename S, typename T, typename U>                                \
    228 inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) {              \
    229   int64 n = c.size();                                                        \
    230   strm.write(reinterpret_cast<char *>(&n), sizeof(n));                       \
    231   for (typename C<S, T, U>::const_iterator it = c.begin();                   \
    232        it != c.end(); ++it)                                                  \
    233      WriteType(strm, *it);                                                   \
    234   return strm;                                                               \
    235 }
    236 
    237 WRITE_STL_ASSOC_TYPE(set);
    238 WRITE_STL_ASSOC_TYPE(unordered_set);
    239 WRITE_STL_ASSOC_TYPE(map);
    240 WRITE_STL_ASSOC_TYPE(unordered_map);
    241 
    242 // Utilities for converting between int64 or Weight and string.
    243 
    244 int64 StrToInt64(const string &s, const string &src, size_t nline,
    245                  bool allow_negative, bool *error = 0);
    246 
    247 template <typename Weight>
    248 Weight StrToWeight(const string &s, const string &src, size_t nline) {
    249   Weight w;
    250   istringstream strm(s);
    251   strm >> w;
    252   if (!strm) {
    253     FSTERROR() << "StrToWeight: Bad weight = \"" << s
    254                << "\", source = " << src << ", line = " << nline;
    255     return Weight::NoWeight();
    256   }
    257   return w;
    258 }
    259 
    260 void Int64ToStr(int64 n, string *s);
    261 
    262 template <typename Weight>
    263 void WeightToStr(Weight w, string *s) {
    264   ostringstream strm;
    265   strm.precision(9);
    266   strm << w;
    267   *s += strm.str();
    268 }
    269 
    270 // Utilities for reading/writing label pairs
    271 
    272 // Returns true on success
    273 template <typename Label>
    274 bool ReadLabelPairs(const string& filename,
    275                     vector<pair<Label, Label> >* pairs,
    276                     bool allow_negative = false) {
    277   ifstream strm(filename.c_str());
    278 
    279   if (!strm) {
    280     LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename;
    281     return false;
    282   }
    283 
    284   const int kLineLen = 8096;
    285   char line[kLineLen];
    286   size_t nline = 0;
    287 
    288   pairs->clear();
    289   while (strm.getline(line, kLineLen)) {
    290     ++nline;
    291     vector<char *> col;
    292     SplitToVector(line, "\n\t ", &col, true);
    293     if (col.size() == 0 || col[0][0] == '\0')  // empty line
    294       continue;
    295     if (col.size() != 2) {
    296       LOG(ERROR) << "ReadLabelPairs: Bad number of columns, "
    297                  << "file = " << filename << ", line = " << nline;
    298       return false;
    299     }
    300 
    301     bool err;
    302     Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err);
    303     if (err) return false;
    304     Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err);
    305     if (err) return false;
    306     pairs->push_back(make_pair(frmlabel, tolabel));
    307   }
    308   return true;
    309 }
    310 
    311 // Returns true on success
    312 template <typename Label>
    313 bool WriteLabelPairs(const string& filename,
    314                      const vector<pair<Label, Label> >& pairs) {
    315   ostream *strm = &std::cout;
    316   if (!filename.empty()) {
    317     strm = new ofstream(filename.c_str());
    318     if (!*strm) {
    319       LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename;
    320       return false;
    321     }
    322   }
    323 
    324   for (ssize_t n = 0; n < pairs.size(); ++n)
    325     *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
    326 
    327   if (!*strm) {
    328     LOG(ERROR) << "WriteLabelPairs: Write failed: "
    329                << (filename.empty() ? "standard output" : filename);
    330     return false;
    331   }
    332   if (strm != &std::cout)
    333     delete strm;
    334   return true;
    335 }
    336 
    337 // Utilities for converting a type name to a legal C symbol.
    338 
    339 void ConvertToLegalCSymbol(string *s);
    340 
    341 
    342 //
    343 // UTILITIES FOR STREAM I/O
    344 //
    345 
    346 bool AlignInput(istream &strm, int align);
    347 bool AlignOutput(ostream &strm, int align);
    348 
    349 //
    350 // UTILITIES FOR PROTOCOL BUFFER I/O
    351 //
    352 
    353 
    354 // An associative container for which testing membership is
    355 // faster than an STL set if members are restricted to an interval
    356 // that excludes most non-members. A 'Key' must have ==, !=, and < defined.
    357 // Element 'NoKey' should be a key that marks an uninitialized key and
    358 // is otherwise unused. 'Find()' returns an STL const_iterator to the match
    359 // found, otherwise it equals 'End()'.
    360 template <class Key, Key NoKey>
    361 class CompactSet {
    362 public:
    363   typedef typename set<Key>::const_iterator const_iterator;
    364 
    365   CompactSet()
    366     : min_key_(NoKey),
    367       max_key_(NoKey) { }
    368 
    369   CompactSet(const CompactSet<Key, NoKey> &compact_set)
    370     : set_(compact_set.set_),
    371       min_key_(compact_set.min_key_),
    372       max_key_(compact_set.max_key_) { }
    373 
    374   void Insert(Key key) {
    375     set_.insert(key);
    376     if (min_key_ == NoKey || key < min_key_)
    377       min_key_ = key;
    378     if (max_key_ == NoKey || max_key_ < key)
    379         max_key_ = key;
    380   }
    381 
    382   void Clear() {
    383     set_.clear();
    384     min_key_ = max_key_ = NoKey;
    385   }
    386 
    387   const_iterator Find(Key key) const {
    388     if (min_key_ == NoKey ||
    389         key < min_key_ || max_key_ < key)
    390       return set_.end();
    391     else
    392       return set_.find(key);
    393   }
    394 
    395   const_iterator Begin() const { return set_.begin(); }
    396 
    397   const_iterator End() const { return set_.end(); }
    398 
    399 private:
    400   set<Key> set_;
    401   Key min_key_;
    402   Key max_key_;
    403 
    404   void operator=(const CompactSet<Key, NoKey> &);  //disallow
    405 };
    406 
    407 }  // namespace fst
    408 
    409 #endif  // FST_LIB_UTIL_H__
    410