Home | History | Annotate | Download | only in flatbuffers
      1 /*
      2  * Copyright 2014 Google Inc. All rights reserved.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef FLATBUFFERS_UTIL_H_
     18 #define FLATBUFFERS_UTIL_H_
     19 
     20 #include <fstream>
     21 #include <iomanip>
     22 #include <string>
     23 #include <sstream>
     24 #include <stdint.h>
     25 #include <stdlib.h>
     26 #include <assert.h>
     27 #ifdef _WIN32
     28 #ifndef WIN32_LEAN_AND_MEAN
     29   #define WIN32_LEAN_AND_MEAN
     30 #endif
     31 #ifndef NOMINMAX
     32   #define NOMINMAX
     33 #endif
     34 #include <windows.h>
     35 #include <winbase.h>
     36 #include <direct.h>
     37 #else
     38 #include <limits.h>
     39 #endif
     40 #include <sys/types.h>
     41 #include <sys/stat.h>
     42 
     43 #include "flatbuffers/base.h"
     44 
     45 
     46 namespace flatbuffers {
     47 
     48 // Convert an integer or floating point value to a string.
     49 // In contrast to std::stringstream, "char" values are
     50 // converted to a string of digits, and we don't use scientific notation.
     51 template<typename T> std::string NumToString(T t) {
     52   std::stringstream ss;
     53   ss << t;
     54   return ss.str();
     55 }
     56 // Avoid char types used as character data.
     57 template<> inline std::string NumToString<signed char>(signed char t) {
     58   return NumToString(static_cast<int>(t));
     59 }
     60 template<> inline std::string NumToString<unsigned char>(unsigned char t) {
     61   return NumToString(static_cast<int>(t));
     62 }
     63 #if defined(FLATBUFFERS_CPP98_STL)
     64   template <> inline std::string NumToString<long long>(long long t) {
     65     char buf[21]; // (log((1 << 63) - 1) / log(10)) + 2
     66     snprintf(buf, sizeof(buf), "%lld", t);
     67     return std::string(buf);
     68   }
     69 
     70   template <> inline std::string NumToString<unsigned long long>(
     71       unsigned long long t) {
     72     char buf[22]; // (log((1 << 63) - 1) / log(10)) + 1
     73     snprintf(buf, sizeof(buf), "%llu", t);
     74     return std::string(buf);
     75   }
     76 #endif  // defined(FLATBUFFERS_CPP98_STL)
     77 
     78 // Special versions for floats/doubles.
     79 template<> inline std::string NumToString<double>(double t) {
     80   // to_string() prints different numbers of digits for floats depending on
     81   // platform and isn't available on Android, so we use stringstream
     82   std::stringstream ss;
     83   // Use std::fixed to surpress scientific notation.
     84   ss << std::fixed << t;
     85   auto s = ss.str();
     86   // Sadly, std::fixed turns "1" into "1.00000", so here we undo that.
     87   auto p = s.find_last_not_of('0');
     88   if (p != std::string::npos) {
     89     // Strip trailing zeroes. If it is a whole number, keep one zero.
     90     s.resize(p + (s[p] == '.' ? 2 : 1));
     91   }
     92   return s;
     93 }
     94 template<> inline std::string NumToString<float>(float t) {
     95   return NumToString(static_cast<double>(t));
     96 }
     97 
     98 // Convert an integer value to a hexadecimal string.
     99 // The returned string length is always xdigits long, prefixed by 0 digits.
    100 // For example, IntToStringHex(0x23, 8) returns the string "00000023".
    101 inline std::string IntToStringHex(int i, int xdigits) {
    102   std::stringstream ss;
    103   ss << std::setw(xdigits)
    104      << std::setfill('0')
    105      << std::hex
    106      << std::uppercase
    107      << i;
    108   return ss.str();
    109 }
    110 
    111 // Portable implementation of strtoll().
    112 inline int64_t StringToInt(const char *str, char **endptr = nullptr,
    113                            int base = 10) {
    114   #ifdef _MSC_VER
    115     return _strtoi64(str, endptr, base);
    116   #else
    117     return strtoll(str, endptr, base);
    118   #endif
    119 }
    120 
    121 // Portable implementation of strtoull().
    122 inline uint64_t StringToUInt(const char *str, char **endptr = nullptr,
    123                              int base = 10) {
    124   #ifdef _MSC_VER
    125     return _strtoui64(str, endptr, base);
    126   #else
    127     return strtoull(str, endptr, base);
    128   #endif
    129 }
    130 
    131 typedef bool (*LoadFileFunction)(const char *filename, bool binary,
    132                                  std::string *dest);
    133 typedef bool (*FileExistsFunction)(const char *filename);
    134 
    135 LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function);
    136 
    137 FileExistsFunction SetFileExistsFunction(FileExistsFunction
    138                                          file_exists_function);
    139 
    140 
    141 // Check if file "name" exists.
    142 bool FileExists(const char *name);
    143 
    144 // Check if "name" exists and it is also a directory.
    145 bool DirExists(const char *name);
    146 
    147 // Load file "name" into "buf" returning true if successful
    148 // false otherwise.  If "binary" is false data is read
    149 // using ifstream's text mode, otherwise data is read with
    150 // no transcoding.
    151 bool LoadFile(const char *name, bool binary, std::string *buf);
    152 
    153 // Save data "buf" of length "len" bytes into a file
    154 // "name" returning true if successful, false otherwise.
    155 // If "binary" is false data is written using ifstream's
    156 // text mode, otherwise data is written with no
    157 // transcoding.
    158 inline bool SaveFile(const char *name, const char *buf, size_t len,
    159                      bool binary) {
    160   std::ofstream ofs(name, binary ? std::ofstream::binary : std::ofstream::out);
    161   if (!ofs.is_open()) return false;
    162   ofs.write(buf, len);
    163   return !ofs.bad();
    164 }
    165 
    166 // Save data "buf" into file "name" returning true if
    167 // successful, false otherwise.  If "binary" is false
    168 // data is written using ifstream's text mode, otherwise
    169 // data is written with no transcoding.
    170 inline bool SaveFile(const char *name, const std::string &buf, bool binary) {
    171   return SaveFile(name, buf.c_str(), buf.size(), binary);
    172 }
    173 
    174 // Functionality for minimalistic portable path handling.
    175 
    176 // The functions below behave correctly regardless of whether posix ('/') or
    177 // Windows ('/' or '\\') separators are used.
    178 
    179 // Any new separators inserted are always posix.
    180 
    181 // We internally store paths in posix format ('/'). Paths supplied
    182 // by the user should go through PosixPath to ensure correct behavior
    183 // on Windows when paths are string-compared.
    184 
    185 static const char kPathSeparator = '/';
    186 static const char kPathSeparatorWindows = '\\';
    187 static const char *PathSeparatorSet = "\\/";  // Intentionally no ':'
    188 
    189 // Returns the path with the extension, if any, removed.
    190 inline std::string StripExtension(const std::string &filepath) {
    191   size_t i = filepath.find_last_of(".");
    192   return i != std::string::npos ? filepath.substr(0, i) : filepath;
    193 }
    194 
    195 // Returns the extension, if any.
    196 inline std::string GetExtension(const std::string &filepath) {
    197   size_t i = filepath.find_last_of(".");
    198   return i != std::string::npos ? filepath.substr(i + 1) : "";
    199 }
    200 
    201 // Return the last component of the path, after the last separator.
    202 inline std::string StripPath(const std::string &filepath) {
    203   size_t i = filepath.find_last_of(PathSeparatorSet);
    204   return i != std::string::npos ? filepath.substr(i + 1) : filepath;
    205 }
    206 
    207 // Strip the last component of the path + separator.
    208 inline std::string StripFileName(const std::string &filepath) {
    209   size_t i = filepath.find_last_of(PathSeparatorSet);
    210   return i != std::string::npos ? filepath.substr(0, i) : "";
    211 }
    212 
    213 // Concatenates a path with a filename, regardless of wether the path
    214 // ends in a separator or not.
    215 inline std::string ConCatPathFileName(const std::string &path,
    216                                       const std::string &filename) {
    217   std::string filepath = path;
    218   if (filepath.length()) {
    219     char filepath_last_character = string_back(filepath);
    220     if (filepath_last_character == kPathSeparatorWindows) {
    221       filepath_last_character = kPathSeparator;
    222     } else if (filepath_last_character != kPathSeparator) {
    223       filepath += kPathSeparator;
    224     }
    225   }
    226   filepath += filename;
    227   return filepath;
    228 }
    229 
    230 // Replaces any '\\' separators with '/'
    231 inline std::string PosixPath(const char *path) {
    232   std::string p = path;
    233   std::replace(p.begin(), p.end(), '\\', '/');
    234   return p;
    235 }
    236 
    237 // This function ensure a directory exists, by recursively
    238 // creating dirs for any parts of the path that don't exist yet.
    239 inline void EnsureDirExists(const std::string &filepath) {
    240   auto parent = StripFileName(filepath);
    241   if (parent.length()) EnsureDirExists(parent);
    242   #ifdef _WIN32
    243     (void)_mkdir(filepath.c_str());
    244   #else
    245     mkdir(filepath.c_str(), S_IRWXU|S_IRGRP|S_IXGRP);
    246   #endif
    247 }
    248 
    249 // Obtains the absolute path from any other path.
    250 // Returns the input path if the absolute path couldn't be resolved.
    251 inline std::string AbsolutePath(const std::string &filepath) {
    252   #ifdef FLATBUFFERS_NO_ABSOLUTE_PATH_RESOLUTION
    253     return filepath;
    254   #else
    255     #ifdef _WIN32
    256       char abs_path[MAX_PATH];
    257       return GetFullPathNameA(filepath.c_str(), MAX_PATH, abs_path, nullptr)
    258     #else
    259       char abs_path[PATH_MAX];
    260       return realpath(filepath.c_str(), abs_path)
    261     #endif
    262       ? abs_path
    263       : filepath;
    264   #endif // FLATBUFFERS_NO_ABSOLUTE_PATH_RESOLUTION
    265 }
    266 
    267 // To and from UTF-8 unicode conversion functions
    268 
    269 // Convert a unicode code point into a UTF-8 representation by appending it
    270 // to a string. Returns the number of bytes generated.
    271 inline int ToUTF8(uint32_t ucc, std::string *out) {
    272   assert(!(ucc & 0x80000000));  // Top bit can't be set.
    273   // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
    274   for (int i = 0; i < 6; i++) {
    275     // Max bits this encoding can represent.
    276     uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
    277     if (ucc < (1u << max_bits)) {  // does it fit?
    278       // Remaining bits not encoded in the first byte, store 6 bits each
    279       uint32_t remain_bits = i * 6;
    280       // Store first byte:
    281       (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) |
    282                                  (ucc >> remain_bits));
    283       // Store remaining bytes:
    284       for (int j = i - 1; j >= 0; j--) {
    285         (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
    286       }
    287       return i + 1;  // Return the number of bytes added.
    288     }
    289   }
    290   assert(0);  // Impossible to arrive here.
    291   return -1;
    292 }
    293 
    294 // Converts whatever prefix of the incoming string corresponds to a valid
    295 // UTF-8 sequence into a unicode code. The incoming pointer will have been
    296 // advanced past all bytes parsed.
    297 // returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
    298 // this case).
    299 inline int FromUTF8(const char **in) {
    300   int len = 0;
    301   // Count leading 1 bits.
    302   for (int mask = 0x80; mask >= 0x04; mask >>= 1) {
    303     if (**in & mask) {
    304       len++;
    305     } else {
    306       break;
    307     }
    308   }
    309   if ((**in << len) & 0x80) return -1;  // Bit after leading 1's must be 0.
    310   if (!len) return *(*in)++;
    311   // UTF-8 encoded values with a length are between 2 and 4 bytes.
    312   if (len < 2 || len > 4) {
    313     return -1;
    314   }
    315   // Grab initial bits of the code.
    316   int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
    317   for (int i = 0; i < len - 1; i++) {
    318     if ((**in & 0xC0) != 0x80) return -1;  // Upper bits must 1 0.
    319     ucc <<= 6;
    320     ucc |= *(*in)++ & 0x3F;  // Grab 6 more bits of the code.
    321   }
    322   // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for
    323   // UTF-16 surrogate pairs).
    324   if (ucc >= 0xD800 && ucc <= 0xDFFF) {
    325     return -1;
    326   }
    327   // UTF-8 must represent code points in their shortest possible encoding.
    328   switch (len) {
    329     case 2:
    330       // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF.
    331       if (ucc < 0x0080 || ucc > 0x07FF) {
    332         return -1;
    333       }
    334       break;
    335     case 3:
    336       // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF.
    337       if (ucc < 0x0800 || ucc > 0xFFFF) {
    338         return -1;
    339       }
    340       break;
    341     case 4:
    342       // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF.
    343       if (ucc < 0x10000 || ucc > 0x10FFFF) {
    344         return -1;
    345       }
    346       break;
    347   }
    348   return ucc;
    349 }
    350 
    351 // Wraps a string to a maximum length, inserting new lines where necessary. Any
    352 // existing whitespace will be collapsed down to a single space. A prefix or
    353 // suffix can be provided, which will be inserted before or after a wrapped
    354 // line, respectively.
    355 inline std::string WordWrap(const std::string in, size_t max_length,
    356                             const std::string wrapped_line_prefix,
    357                             const std::string wrapped_line_suffix) {
    358   std::istringstream in_stream(in);
    359   std::string wrapped, line, word;
    360 
    361   in_stream >> word;
    362   line = word;
    363 
    364   while (in_stream >> word) {
    365     if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) <
    366         max_length) {
    367       line += " " + word;
    368     } else {
    369       wrapped += line + wrapped_line_suffix + "\n";
    370       line = wrapped_line_prefix + word;
    371     }
    372   }
    373   wrapped += line;
    374 
    375   return wrapped;
    376 }
    377 
    378 inline bool EscapeString(const char *s, size_t length, std::string *_text,
    379                          bool allow_non_utf8) {
    380   std::string &text = *_text;
    381   text += "\"";
    382   for (uoffset_t i = 0; i < length; i++) {
    383     char c = s[i];
    384     switch (c) {
    385       case '\n': text += "\\n"; break;
    386       case '\t': text += "\\t"; break;
    387       case '\r': text += "\\r"; break;
    388       case '\b': text += "\\b"; break;
    389       case '\f': text += "\\f"; break;
    390       case '\"': text += "\\\""; break;
    391       case '\\': text += "\\\\"; break;
    392       default:
    393         if (c >= ' ' && c <= '~') {
    394           text += c;
    395         } else {
    396           // Not printable ASCII data. Let's see if it's valid UTF-8 first:
    397           const char *utf8 = s + i;
    398           int ucc = FromUTF8(&utf8);
    399           if (ucc < 0) {
    400             if (allow_non_utf8) {
    401               text += "\\x";
    402               text += IntToStringHex(static_cast<uint8_t>(c), 2);
    403             } else {
    404               // There are two cases here:
    405               //
    406               // 1) We reached here by parsing an IDL file. In that case,
    407               // we previously checked for non-UTF-8, so we shouldn't reach
    408               // here.
    409               //
    410               // 2) We reached here by someone calling GenerateText()
    411               // on a previously-serialized flatbuffer. The data might have
    412               // non-UTF-8 Strings, or might be corrupt.
    413               //
    414               // In both cases, we have to give up and inform the caller
    415               // they have no JSON.
    416               return false;
    417             }
    418           } else {
    419             if (ucc <= 0xFFFF) {
    420               // Parses as Unicode within JSON's \uXXXX range, so use that.
    421               text += "\\u";
    422               text += IntToStringHex(ucc, 4);
    423             } else if (ucc <= 0x10FFFF) {
    424               // Encode Unicode SMP values to a surrogate pair using two \u escapes.
    425               uint32_t base = ucc - 0x10000;
    426               auto high_surrogate = (base >> 10) + 0xD800;
    427               auto low_surrogate = (base & 0x03FF) + 0xDC00;
    428               text += "\\u";
    429               text += IntToStringHex(high_surrogate, 4);
    430               text += "\\u";
    431               text += IntToStringHex(low_surrogate, 4);
    432             }
    433             // Skip past characters recognized.
    434             i = static_cast<uoffset_t>(utf8 - s - 1);
    435           }
    436         }
    437         break;
    438     }
    439   }
    440   text += "\"";
    441   return true;
    442 }
    443 
    444 }  // namespace flatbuffers
    445 
    446 #endif  // FLATBUFFERS_UTIL_H_
    447