1 /* 2 * Copyright 2014 Google Inc. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef FLATBUFFERS_UTIL_H_ 18 #define FLATBUFFERS_UTIL_H_ 19 20 #include <fstream> 21 #include <iomanip> 22 #include <string> 23 #include <sstream> 24 #include <stdint.h> 25 #include <stdlib.h> 26 #include <assert.h> 27 #ifdef _WIN32 28 #ifndef WIN32_LEAN_AND_MEAN 29 #define WIN32_LEAN_AND_MEAN 30 #endif 31 #ifndef NOMINMAX 32 #define NOMINMAX 33 #endif 34 #include <windows.h> 35 #include <winbase.h> 36 #include <direct.h> 37 #else 38 #include <limits.h> 39 #endif 40 #include <sys/types.h> 41 #include <sys/stat.h> 42 43 #include "flatbuffers/base.h" 44 45 46 namespace flatbuffers { 47 48 // Convert an integer or floating point value to a string. 49 // In contrast to std::stringstream, "char" values are 50 // converted to a string of digits, and we don't use scientific notation. 51 template<typename T> std::string NumToString(T t) { 52 std::stringstream ss; 53 ss << t; 54 return ss.str(); 55 } 56 // Avoid char types used as character data. 57 template<> inline std::string NumToString<signed char>(signed char t) { 58 return NumToString(static_cast<int>(t)); 59 } 60 template<> inline std::string NumToString<unsigned char>(unsigned char t) { 61 return NumToString(static_cast<int>(t)); 62 } 63 #if defined(FLATBUFFERS_CPP98_STL) 64 template <> inline std::string NumToString<long long>(long long t) { 65 char buf[21]; // (log((1 << 63) - 1) / log(10)) + 2 66 snprintf(buf, sizeof(buf), "%lld", t); 67 return std::string(buf); 68 } 69 70 template <> inline std::string NumToString<unsigned long long>( 71 unsigned long long t) { 72 char buf[22]; // (log((1 << 63) - 1) / log(10)) + 1 73 snprintf(buf, sizeof(buf), "%llu", t); 74 return std::string(buf); 75 } 76 #endif // defined(FLATBUFFERS_CPP98_STL) 77 78 // Special versions for floats/doubles. 79 template<> inline std::string NumToString<double>(double t) { 80 // to_string() prints different numbers of digits for floats depending on 81 // platform and isn't available on Android, so we use stringstream 82 std::stringstream ss; 83 // Use std::fixed to surpress scientific notation. 84 ss << std::fixed << t; 85 auto s = ss.str(); 86 // Sadly, std::fixed turns "1" into "1.00000", so here we undo that. 87 auto p = s.find_last_not_of('0'); 88 if (p != std::string::npos) { 89 // Strip trailing zeroes. If it is a whole number, keep one zero. 90 s.resize(p + (s[p] == '.' ? 2 : 1)); 91 } 92 return s; 93 } 94 template<> inline std::string NumToString<float>(float t) { 95 return NumToString(static_cast<double>(t)); 96 } 97 98 // Convert an integer value to a hexadecimal string. 99 // The returned string length is always xdigits long, prefixed by 0 digits. 100 // For example, IntToStringHex(0x23, 8) returns the string "00000023". 101 inline std::string IntToStringHex(int i, int xdigits) { 102 std::stringstream ss; 103 ss << std::setw(xdigits) 104 << std::setfill('0') 105 << std::hex 106 << std::uppercase 107 << i; 108 return ss.str(); 109 } 110 111 // Portable implementation of strtoll(). 112 inline int64_t StringToInt(const char *str, char **endptr = nullptr, 113 int base = 10) { 114 #ifdef _MSC_VER 115 return _strtoi64(str, endptr, base); 116 #else 117 return strtoll(str, endptr, base); 118 #endif 119 } 120 121 // Portable implementation of strtoull(). 122 inline uint64_t StringToUInt(const char *str, char **endptr = nullptr, 123 int base = 10) { 124 #ifdef _MSC_VER 125 return _strtoui64(str, endptr, base); 126 #else 127 return strtoull(str, endptr, base); 128 #endif 129 } 130 131 typedef bool (*LoadFileFunction)(const char *filename, bool binary, 132 std::string *dest); 133 typedef bool (*FileExistsFunction)(const char *filename); 134 135 LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function); 136 137 FileExistsFunction SetFileExistsFunction(FileExistsFunction 138 file_exists_function); 139 140 141 // Check if file "name" exists. 142 bool FileExists(const char *name); 143 144 // Check if "name" exists and it is also a directory. 145 bool DirExists(const char *name); 146 147 // Load file "name" into "buf" returning true if successful 148 // false otherwise. If "binary" is false data is read 149 // using ifstream's text mode, otherwise data is read with 150 // no transcoding. 151 bool LoadFile(const char *name, bool binary, std::string *buf); 152 153 // Save data "buf" of length "len" bytes into a file 154 // "name" returning true if successful, false otherwise. 155 // If "binary" is false data is written using ifstream's 156 // text mode, otherwise data is written with no 157 // transcoding. 158 inline bool SaveFile(const char *name, const char *buf, size_t len, 159 bool binary) { 160 std::ofstream ofs(name, binary ? std::ofstream::binary : std::ofstream::out); 161 if (!ofs.is_open()) return false; 162 ofs.write(buf, len); 163 return !ofs.bad(); 164 } 165 166 // Save data "buf" into file "name" returning true if 167 // successful, false otherwise. If "binary" is false 168 // data is written using ifstream's text mode, otherwise 169 // data is written with no transcoding. 170 inline bool SaveFile(const char *name, const std::string &buf, bool binary) { 171 return SaveFile(name, buf.c_str(), buf.size(), binary); 172 } 173 174 // Functionality for minimalistic portable path handling. 175 176 // The functions below behave correctly regardless of whether posix ('/') or 177 // Windows ('/' or '\\') separators are used. 178 179 // Any new separators inserted are always posix. 180 181 // We internally store paths in posix format ('/'). Paths supplied 182 // by the user should go through PosixPath to ensure correct behavior 183 // on Windows when paths are string-compared. 184 185 static const char kPathSeparator = '/'; 186 static const char kPathSeparatorWindows = '\\'; 187 static const char *PathSeparatorSet = "\\/"; // Intentionally no ':' 188 189 // Returns the path with the extension, if any, removed. 190 inline std::string StripExtension(const std::string &filepath) { 191 size_t i = filepath.find_last_of("."); 192 return i != std::string::npos ? filepath.substr(0, i) : filepath; 193 } 194 195 // Returns the extension, if any. 196 inline std::string GetExtension(const std::string &filepath) { 197 size_t i = filepath.find_last_of("."); 198 return i != std::string::npos ? filepath.substr(i + 1) : ""; 199 } 200 201 // Return the last component of the path, after the last separator. 202 inline std::string StripPath(const std::string &filepath) { 203 size_t i = filepath.find_last_of(PathSeparatorSet); 204 return i != std::string::npos ? filepath.substr(i + 1) : filepath; 205 } 206 207 // Strip the last component of the path + separator. 208 inline std::string StripFileName(const std::string &filepath) { 209 size_t i = filepath.find_last_of(PathSeparatorSet); 210 return i != std::string::npos ? filepath.substr(0, i) : ""; 211 } 212 213 // Concatenates a path with a filename, regardless of wether the path 214 // ends in a separator or not. 215 inline std::string ConCatPathFileName(const std::string &path, 216 const std::string &filename) { 217 std::string filepath = path; 218 if (filepath.length()) { 219 char filepath_last_character = string_back(filepath); 220 if (filepath_last_character == kPathSeparatorWindows) { 221 filepath_last_character = kPathSeparator; 222 } else if (filepath_last_character != kPathSeparator) { 223 filepath += kPathSeparator; 224 } 225 } 226 filepath += filename; 227 return filepath; 228 } 229 230 // Replaces any '\\' separators with '/' 231 inline std::string PosixPath(const char *path) { 232 std::string p = path; 233 std::replace(p.begin(), p.end(), '\\', '/'); 234 return p; 235 } 236 237 // This function ensure a directory exists, by recursively 238 // creating dirs for any parts of the path that don't exist yet. 239 inline void EnsureDirExists(const std::string &filepath) { 240 auto parent = StripFileName(filepath); 241 if (parent.length()) EnsureDirExists(parent); 242 #ifdef _WIN32 243 (void)_mkdir(filepath.c_str()); 244 #else 245 mkdir(filepath.c_str(), S_IRWXU|S_IRGRP|S_IXGRP); 246 #endif 247 } 248 249 // Obtains the absolute path from any other path. 250 // Returns the input path if the absolute path couldn't be resolved. 251 inline std::string AbsolutePath(const std::string &filepath) { 252 #ifdef FLATBUFFERS_NO_ABSOLUTE_PATH_RESOLUTION 253 return filepath; 254 #else 255 #ifdef _WIN32 256 char abs_path[MAX_PATH]; 257 return GetFullPathNameA(filepath.c_str(), MAX_PATH, abs_path, nullptr) 258 #else 259 char abs_path[PATH_MAX]; 260 return realpath(filepath.c_str(), abs_path) 261 #endif 262 ? abs_path 263 : filepath; 264 #endif // FLATBUFFERS_NO_ABSOLUTE_PATH_RESOLUTION 265 } 266 267 // To and from UTF-8 unicode conversion functions 268 269 // Convert a unicode code point into a UTF-8 representation by appending it 270 // to a string. Returns the number of bytes generated. 271 inline int ToUTF8(uint32_t ucc, std::string *out) { 272 assert(!(ucc & 0x80000000)); // Top bit can't be set. 273 // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8 274 for (int i = 0; i < 6; i++) { 275 // Max bits this encoding can represent. 276 uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i); 277 if (ucc < (1u << max_bits)) { // does it fit? 278 // Remaining bits not encoded in the first byte, store 6 bits each 279 uint32_t remain_bits = i * 6; 280 // Store first byte: 281 (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) | 282 (ucc >> remain_bits)); 283 // Store remaining bytes: 284 for (int j = i - 1; j >= 0; j--) { 285 (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80); 286 } 287 return i + 1; // Return the number of bytes added. 288 } 289 } 290 assert(0); // Impossible to arrive here. 291 return -1; 292 } 293 294 // Converts whatever prefix of the incoming string corresponds to a valid 295 // UTF-8 sequence into a unicode code. The incoming pointer will have been 296 // advanced past all bytes parsed. 297 // returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in 298 // this case). 299 inline int FromUTF8(const char **in) { 300 int len = 0; 301 // Count leading 1 bits. 302 for (int mask = 0x80; mask >= 0x04; mask >>= 1) { 303 if (**in & mask) { 304 len++; 305 } else { 306 break; 307 } 308 } 309 if ((**in << len) & 0x80) return -1; // Bit after leading 1's must be 0. 310 if (!len) return *(*in)++; 311 // UTF-8 encoded values with a length are between 2 and 4 bytes. 312 if (len < 2 || len > 4) { 313 return -1; 314 } 315 // Grab initial bits of the code. 316 int ucc = *(*in)++ & ((1 << (7 - len)) - 1); 317 for (int i = 0; i < len - 1; i++) { 318 if ((**in & 0xC0) != 0x80) return -1; // Upper bits must 1 0. 319 ucc <<= 6; 320 ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code. 321 } 322 // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for 323 // UTF-16 surrogate pairs). 324 if (ucc >= 0xD800 && ucc <= 0xDFFF) { 325 return -1; 326 } 327 // UTF-8 must represent code points in their shortest possible encoding. 328 switch (len) { 329 case 2: 330 // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF. 331 if (ucc < 0x0080 || ucc > 0x07FF) { 332 return -1; 333 } 334 break; 335 case 3: 336 // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF. 337 if (ucc < 0x0800 || ucc > 0xFFFF) { 338 return -1; 339 } 340 break; 341 case 4: 342 // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF. 343 if (ucc < 0x10000 || ucc > 0x10FFFF) { 344 return -1; 345 } 346 break; 347 } 348 return ucc; 349 } 350 351 // Wraps a string to a maximum length, inserting new lines where necessary. Any 352 // existing whitespace will be collapsed down to a single space. A prefix or 353 // suffix can be provided, which will be inserted before or after a wrapped 354 // line, respectively. 355 inline std::string WordWrap(const std::string in, size_t max_length, 356 const std::string wrapped_line_prefix, 357 const std::string wrapped_line_suffix) { 358 std::istringstream in_stream(in); 359 std::string wrapped, line, word; 360 361 in_stream >> word; 362 line = word; 363 364 while (in_stream >> word) { 365 if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) < 366 max_length) { 367 line += " " + word; 368 } else { 369 wrapped += line + wrapped_line_suffix + "\n"; 370 line = wrapped_line_prefix + word; 371 } 372 } 373 wrapped += line; 374 375 return wrapped; 376 } 377 378 inline bool EscapeString(const char *s, size_t length, std::string *_text, 379 bool allow_non_utf8) { 380 std::string &text = *_text; 381 text += "\""; 382 for (uoffset_t i = 0; i < length; i++) { 383 char c = s[i]; 384 switch (c) { 385 case '\n': text += "\\n"; break; 386 case '\t': text += "\\t"; break; 387 case '\r': text += "\\r"; break; 388 case '\b': text += "\\b"; break; 389 case '\f': text += "\\f"; break; 390 case '\"': text += "\\\""; break; 391 case '\\': text += "\\\\"; break; 392 default: 393 if (c >= ' ' && c <= '~') { 394 text += c; 395 } else { 396 // Not printable ASCII data. Let's see if it's valid UTF-8 first: 397 const char *utf8 = s + i; 398 int ucc = FromUTF8(&utf8); 399 if (ucc < 0) { 400 if (allow_non_utf8) { 401 text += "\\x"; 402 text += IntToStringHex(static_cast<uint8_t>(c), 2); 403 } else { 404 // There are two cases here: 405 // 406 // 1) We reached here by parsing an IDL file. In that case, 407 // we previously checked for non-UTF-8, so we shouldn't reach 408 // here. 409 // 410 // 2) We reached here by someone calling GenerateText() 411 // on a previously-serialized flatbuffer. The data might have 412 // non-UTF-8 Strings, or might be corrupt. 413 // 414 // In both cases, we have to give up and inform the caller 415 // they have no JSON. 416 return false; 417 } 418 } else { 419 if (ucc <= 0xFFFF) { 420 // Parses as Unicode within JSON's \uXXXX range, so use that. 421 text += "\\u"; 422 text += IntToStringHex(ucc, 4); 423 } else if (ucc <= 0x10FFFF) { 424 // Encode Unicode SMP values to a surrogate pair using two \u escapes. 425 uint32_t base = ucc - 0x10000; 426 auto high_surrogate = (base >> 10) + 0xD800; 427 auto low_surrogate = (base & 0x03FF) + 0xDC00; 428 text += "\\u"; 429 text += IntToStringHex(high_surrogate, 4); 430 text += "\\u"; 431 text += IntToStringHex(low_surrogate, 4); 432 } 433 // Skip past characters recognized. 434 i = static_cast<uoffset_t>(utf8 - s - 1); 435 } 436 } 437 break; 438 } 439 } 440 text += "\""; 441 return true; 442 } 443 444 } // namespace flatbuffers 445 446 #endif // FLATBUFFERS_UTIL_H_ 447