1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // from google3/strings/strutil.h 32 33 #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 34 #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 35 36 #include <stdlib.h> 37 #include <vector> 38 #include <google/protobuf/stubs/common.h> 39 #include <google/protobuf/stubs/stringpiece.h> 40 41 namespace google { 42 namespace protobuf { 43 44 #ifdef _MSC_VER 45 #define strtoll _strtoi64 46 #define strtoull _strtoui64 47 #elif defined(__DECCXX) && defined(__osf__) 48 // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. 49 #define strtoll strtol 50 #define strtoull strtoul 51 #endif 52 53 // ---------------------------------------------------------------------- 54 // ascii_isalnum() 55 // Check if an ASCII character is alphanumeric. We can't use ctype's 56 // isalnum() because it is affected by locale. This function is applied 57 // to identifiers in the protocol buffer language, not to natural-language 58 // strings, so locale should not be taken into account. 59 // ascii_isdigit() 60 // Like above, but only accepts digits. 61 // ascii_isspace() 62 // Check if the character is a space character. 63 // ---------------------------------------------------------------------- 64 65 inline bool ascii_isalnum(char c) { 66 return ('a' <= c && c <= 'z') || 67 ('A' <= c && c <= 'Z') || 68 ('0' <= c && c <= '9'); 69 } 70 71 inline bool ascii_isdigit(char c) { 72 return ('0' <= c && c <= '9'); 73 } 74 75 inline bool ascii_isspace(char c) { 76 return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || 77 c == '\r'; 78 } 79 80 inline bool ascii_isupper(char c) { 81 return c >= 'A' && c <= 'Z'; 82 } 83 84 inline bool ascii_islower(char c) { 85 return c >= 'a' && c <= 'z'; 86 } 87 88 inline char ascii_toupper(char c) { 89 return ascii_islower(c) ? c - ('a' - 'A') : c; 90 } 91 92 inline char ascii_tolower(char c) { 93 return ascii_isupper(c) ? c + ('a' - 'A') : c; 94 } 95 96 inline int hex_digit_to_int(char c) { 97 /* Assume ASCII. */ 98 int x = static_cast<unsigned char>(c); 99 if (x > '9') { 100 x += 9; 101 } 102 return x & 0xf; 103 } 104 105 // ---------------------------------------------------------------------- 106 // HasPrefixString() 107 // Check if a string begins with a given prefix. 108 // StripPrefixString() 109 // Given a string and a putative prefix, returns the string minus the 110 // prefix string if the prefix matches, otherwise the original 111 // string. 112 // ---------------------------------------------------------------------- 113 inline bool HasPrefixString(const string& str, 114 const string& prefix) { 115 return str.size() >= prefix.size() && 116 str.compare(0, prefix.size(), prefix) == 0; 117 } 118 119 inline string StripPrefixString(const string& str, const string& prefix) { 120 if (HasPrefixString(str, prefix)) { 121 return str.substr(prefix.size()); 122 } else { 123 return str; 124 } 125 } 126 127 // ---------------------------------------------------------------------- 128 // HasSuffixString() 129 // Return true if str ends in suffix. 130 // StripSuffixString() 131 // Given a string and a putative suffix, returns the string minus the 132 // suffix string if the suffix matches, otherwise the original 133 // string. 134 // ---------------------------------------------------------------------- 135 inline bool HasSuffixString(const string& str, 136 const string& suffix) { 137 return str.size() >= suffix.size() && 138 str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; 139 } 140 141 inline string StripSuffixString(const string& str, const string& suffix) { 142 if (HasSuffixString(str, suffix)) { 143 return str.substr(0, str.size() - suffix.size()); 144 } else { 145 return str; 146 } 147 } 148 149 // ---------------------------------------------------------------------- 150 // StripString 151 // Replaces any occurrence of the character 'remove' (or the characters 152 // in 'remove') with the character 'replacewith'. 153 // Good for keeping html characters or protocol characters (\t) out 154 // of places where they might cause a problem. 155 // StripWhitespace 156 // Removes whitespaces from both ends of the given string. 157 // ---------------------------------------------------------------------- 158 LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove, 159 char replacewith); 160 161 LIBPROTOBUF_EXPORT void StripWhitespace(string* s); 162 163 164 // ---------------------------------------------------------------------- 165 // LowerString() 166 // UpperString() 167 // ToUpper() 168 // Convert the characters in "s" to lowercase or uppercase. ASCII-only: 169 // these functions intentionally ignore locale because they are applied to 170 // identifiers used in the Protocol Buffer language, not to natural-language 171 // strings. 172 // ---------------------------------------------------------------------- 173 174 inline void LowerString(string * s) { 175 string::iterator end = s->end(); 176 for (string::iterator i = s->begin(); i != end; ++i) { 177 // tolower() changes based on locale. We don't want this! 178 if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; 179 } 180 } 181 182 inline void UpperString(string * s) { 183 string::iterator end = s->end(); 184 for (string::iterator i = s->begin(); i != end; ++i) { 185 // toupper() changes based on locale. We don't want this! 186 if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; 187 } 188 } 189 190 inline string ToUpper(const string& s) { 191 string out = s; 192 UpperString(&out); 193 return out; 194 } 195 196 // ---------------------------------------------------------------------- 197 // StringReplace() 198 // Give me a string and two patterns "old" and "new", and I replace 199 // the first instance of "old" in the string with "new", if it 200 // exists. RETURN a new string, regardless of whether the replacement 201 // happened or not. 202 // ---------------------------------------------------------------------- 203 204 LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, 205 const string& newsub, bool replace_all); 206 207 // ---------------------------------------------------------------------- 208 // SplitStringUsing() 209 // Split a string using a character delimiter. Append the components 210 // to 'result'. If there are consecutive delimiters, this function skips 211 // over all of them. 212 // ---------------------------------------------------------------------- 213 LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, 214 vector<string>* res); 215 216 // Split a string using one or more byte delimiters, presented 217 // as a nul-terminated c string. Append the components to 'result'. 218 // If there are consecutive delimiters, this function will return 219 // corresponding empty strings. If you want to drop the empty 220 // strings, try SplitStringUsing(). 221 // 222 // If "full" is the empty string, yields an empty string as the only value. 223 // ---------------------------------------------------------------------- 224 LIBPROTOBUF_EXPORT void SplitStringAllowEmpty(const string& full, 225 const char* delim, 226 vector<string>* result); 227 228 // ---------------------------------------------------------------------- 229 // Split() 230 // Split a string using a character delimiter. 231 // ---------------------------------------------------------------------- 232 inline vector<string> Split( 233 const string& full, const char* delim, bool skip_empty = true) { 234 vector<string> result; 235 if (skip_empty) { 236 SplitStringUsing(full, delim, &result); 237 } else { 238 SplitStringAllowEmpty(full, delim, &result); 239 } 240 return result; 241 } 242 243 // ---------------------------------------------------------------------- 244 // JoinStrings() 245 // These methods concatenate a vector of strings into a C++ string, using 246 // the C-string "delim" as a separator between components. There are two 247 // flavors of the function, one flavor returns the concatenated string, 248 // another takes a pointer to the target string. In the latter case the 249 // target string is cleared and overwritten. 250 // ---------------------------------------------------------------------- 251 LIBPROTOBUF_EXPORT void JoinStrings(const vector<string>& components, 252 const char* delim, string* result); 253 254 inline string JoinStrings(const vector<string>& components, 255 const char* delim) { 256 string result; 257 JoinStrings(components, delim, &result); 258 return result; 259 } 260 261 // ---------------------------------------------------------------------- 262 // UnescapeCEscapeSequences() 263 // Copies "source" to "dest", rewriting C-style escape sequences 264 // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII 265 // equivalents. "dest" must be sufficiently large to hold all 266 // the characters in the rewritten string (i.e. at least as large 267 // as strlen(source) + 1 should be safe, since the replacements 268 // are always shorter than the original escaped sequences). It's 269 // safe for source and dest to be the same. RETURNS the length 270 // of dest. 271 // 272 // It allows hex sequences \xhh, or generally \xhhhhh with an 273 // arbitrary number of hex digits, but all of them together must 274 // specify a value of a single byte (e.g. \x0045 is equivalent 275 // to \x45, and \x1234 is erroneous). 276 // 277 // It also allows escape sequences of the form \uhhhh (exactly four 278 // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight 279 // hex digits, upper or lower case) to specify a Unicode code 280 // point. The dest array will contain the UTF8-encoded version of 281 // that code-point (e.g., if source contains \u2019, then dest will 282 // contain the three bytes 0xE2, 0x80, and 0x99). 283 // 284 // Errors: In the first form of the call, errors are reported with 285 // LOG(ERROR). The same is true for the second form of the call if 286 // the pointer to the string vector is NULL; otherwise, error 287 // messages are stored in the vector. In either case, the effect on 288 // the dest array is not defined, but rest of the source will be 289 // processed. 290 // ---------------------------------------------------------------------- 291 292 LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); 293 LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, 294 vector<string> *errors); 295 296 // ---------------------------------------------------------------------- 297 // UnescapeCEscapeString() 298 // This does the same thing as UnescapeCEscapeSequences, but creates 299 // a new string. The caller does not need to worry about allocating 300 // a dest buffer. This should be used for non performance critical 301 // tasks such as printing debug messages. It is safe for src and dest 302 // to be the same. 303 // 304 // The second call stores its errors in a supplied string vector. 305 // If the string vector pointer is NULL, it reports the errors with LOG(). 306 // 307 // In the first and second calls, the length of dest is returned. In the 308 // the third call, the new string is returned. 309 // ---------------------------------------------------------------------- 310 311 LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); 312 LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, 313 vector<string> *errors); 314 LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); 315 316 // ---------------------------------------------------------------------- 317 // CEscape() 318 // Escapes 'src' using C-style escape sequences and returns the resulting 319 // string. 320 // 321 // Escaped chars: \n, \r, \t, ", ', \, and !isprint(). 322 // ---------------------------------------------------------------------- 323 LIBPROTOBUF_EXPORT string CEscape(const string& src); 324 325 // ---------------------------------------------------------------------- 326 // CEscapeAndAppend() 327 // Escapes 'src' using C-style escape sequences, and appends the escaped 328 // string to 'dest'. 329 // ---------------------------------------------------------------------- 330 LIBPROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, string* dest); 331 332 namespace strings { 333 // Like CEscape() but does not escape bytes with the upper bit set. 334 LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); 335 336 // Like CEscape() but uses hex (\x) escapes instead of octals. 337 LIBPROTOBUF_EXPORT string CHexEscape(const string& src); 338 } // namespace strings 339 340 // ---------------------------------------------------------------------- 341 // strto32() 342 // strtou32() 343 // strto64() 344 // strtou64() 345 // Architecture-neutral plug compatible replacements for strtol() and 346 // strtoul(). Long's have different lengths on ILP-32 and LP-64 347 // platforms, so using these is safer, from the point of view of 348 // overflow behavior, than using the standard libc functions. 349 // ---------------------------------------------------------------------- 350 LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char *nptr, char **endptr, 351 int base); 352 LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char *nptr, char **endptr, 353 int base); 354 355 inline int32 strto32(const char *nptr, char **endptr, int base) { 356 if (sizeof(int32) == sizeof(long)) 357 return strtol(nptr, endptr, base); 358 else 359 return strto32_adaptor(nptr, endptr, base); 360 } 361 362 inline uint32 strtou32(const char *nptr, char **endptr, int base) { 363 if (sizeof(uint32) == sizeof(unsigned long)) 364 return strtoul(nptr, endptr, base); 365 else 366 return strtou32_adaptor(nptr, endptr, base); 367 } 368 369 // For now, long long is 64-bit on all the platforms we care about, so these 370 // functions can simply pass the call to strto[u]ll. 371 inline int64 strto64(const char *nptr, char **endptr, int base) { 372 GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), 373 sizeof_int64_is_not_sizeof_long_long); 374 return strtoll(nptr, endptr, base); 375 } 376 377 inline uint64 strtou64(const char *nptr, char **endptr, int base) { 378 GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), 379 sizeof_uint64_is_not_sizeof_long_long); 380 return strtoull(nptr, endptr, base); 381 } 382 383 // ---------------------------------------------------------------------- 384 // safe_strtob() 385 // safe_strto32() 386 // safe_strtou32() 387 // safe_strto64() 388 // safe_strtou64() 389 // safe_strtof() 390 // safe_strtod() 391 // ---------------------------------------------------------------------- 392 LIBPROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value); 393 394 LIBPROTOBUF_EXPORT bool safe_strto32(const string& str, int32* value); 395 LIBPROTOBUF_EXPORT bool safe_strtou32(const string& str, uint32* value); 396 inline bool safe_strto32(const char* str, int32* value) { 397 return safe_strto32(string(str), value); 398 } 399 inline bool safe_strto32(StringPiece str, int32* value) { 400 return safe_strto32(str.ToString(), value); 401 } 402 inline bool safe_strtou32(const char* str, uint32* value) { 403 return safe_strtou32(string(str), value); 404 } 405 inline bool safe_strtou32(StringPiece str, uint32* value) { 406 return safe_strtou32(str.ToString(), value); 407 } 408 409 LIBPROTOBUF_EXPORT bool safe_strto64(const string& str, int64* value); 410 LIBPROTOBUF_EXPORT bool safe_strtou64(const string& str, uint64* value); 411 inline bool safe_strto64(const char* str, int64* value) { 412 return safe_strto64(string(str), value); 413 } 414 inline bool safe_strto64(StringPiece str, int64* value) { 415 return safe_strto64(str.ToString(), value); 416 } 417 inline bool safe_strtou64(const char* str, uint64* value) { 418 return safe_strtou64(string(str), value); 419 } 420 inline bool safe_strtou64(StringPiece str, uint64* value) { 421 return safe_strtou64(str.ToString(), value); 422 } 423 424 LIBPROTOBUF_EXPORT bool safe_strtof(const char* str, float* value); 425 LIBPROTOBUF_EXPORT bool safe_strtod(const char* str, double* value); 426 inline bool safe_strtof(const string& str, float* value) { 427 return safe_strtof(str.c_str(), value); 428 } 429 inline bool safe_strtod(const string& str, double* value) { 430 return safe_strtod(str.c_str(), value); 431 } 432 inline bool safe_strtof(StringPiece str, float* value) { 433 return safe_strtof(str.ToString(), value); 434 } 435 inline bool safe_strtod(StringPiece str, double* value) { 436 return safe_strtod(str.ToString(), value); 437 } 438 439 // ---------------------------------------------------------------------- 440 // FastIntToBuffer() 441 // FastHexToBuffer() 442 // FastHex64ToBuffer() 443 // FastHex32ToBuffer() 444 // FastTimeToBuffer() 445 // These are intended for speed. FastIntToBuffer() assumes the 446 // integer is non-negative. FastHexToBuffer() puts output in 447 // hex rather than decimal. FastTimeToBuffer() puts the output 448 // into RFC822 format. 449 // 450 // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, 451 // padded to exactly 16 bytes (plus one byte for '\0') 452 // 453 // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, 454 // padded to exactly 8 bytes (plus one byte for '\0') 455 // 456 // All functions take the output buffer as an arg. 457 // They all return a pointer to the beginning of the output, 458 // which may not be the beginning of the input buffer. 459 // ---------------------------------------------------------------------- 460 461 // Suggested buffer size for FastToBuffer functions. Also works with 462 // DoubleToBuffer() and FloatToBuffer(). 463 static const int kFastToBufferSize = 32; 464 465 LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); 466 LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); 467 char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below 468 char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below 469 LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); 470 LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); 471 LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); 472 473 // at least 22 bytes long 474 inline char* FastIntToBuffer(int i, char* buffer) { 475 return (sizeof(i) == 4 ? 476 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 477 } 478 inline char* FastUIntToBuffer(unsigned int i, char* buffer) { 479 return (sizeof(i) == 4 ? 480 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 481 } 482 inline char* FastLongToBuffer(long i, char* buffer) { 483 return (sizeof(i) == 4 ? 484 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 485 } 486 inline char* FastULongToBuffer(unsigned long i, char* buffer) { 487 return (sizeof(i) == 4 ? 488 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 489 } 490 491 // ---------------------------------------------------------------------- 492 // FastInt32ToBufferLeft() 493 // FastUInt32ToBufferLeft() 494 // FastInt64ToBufferLeft() 495 // FastUInt64ToBufferLeft() 496 // 497 // Like the Fast*ToBuffer() functions above, these are intended for speed. 498 // Unlike the Fast*ToBuffer() functions, however, these functions write 499 // their output to the beginning of the buffer (hence the name, as the 500 // output is left-aligned). The caller is responsible for ensuring that 501 // the buffer has enough space to hold the output. 502 // 503 // Returns a pointer to the end of the string (i.e. the null character 504 // terminating the string). 505 // ---------------------------------------------------------------------- 506 507 LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); 508 LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); 509 LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); 510 LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); 511 512 // Just define these in terms of the above. 513 inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { 514 FastUInt32ToBufferLeft(i, buffer); 515 return buffer; 516 } 517 inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { 518 FastUInt64ToBufferLeft(i, buffer); 519 return buffer; 520 } 521 522 inline string SimpleBtoa(bool value) { 523 return value ? "true" : "false"; 524 } 525 526 // ---------------------------------------------------------------------- 527 // SimpleItoa() 528 // Description: converts an integer to a string. 529 // 530 // Return value: string 531 // ---------------------------------------------------------------------- 532 LIBPROTOBUF_EXPORT string SimpleItoa(int i); 533 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i); 534 LIBPROTOBUF_EXPORT string SimpleItoa(long i); 535 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i); 536 LIBPROTOBUF_EXPORT string SimpleItoa(long long i); 537 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); 538 539 // ---------------------------------------------------------------------- 540 // SimpleDtoa() 541 // SimpleFtoa() 542 // DoubleToBuffer() 543 // FloatToBuffer() 544 // Description: converts a double or float to a string which, if 545 // passed to NoLocaleStrtod(), will produce the exact same original double 546 // (except in case of NaN; all NaNs are considered the same value). 547 // We try to keep the string short but it's not guaranteed to be as 548 // short as possible. 549 // 550 // DoubleToBuffer() and FloatToBuffer() write the text to the given 551 // buffer and return it. The buffer must be at least 552 // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize 553 // bytes for floats. kFastToBufferSize is also guaranteed to be large 554 // enough to hold either. 555 // 556 // Return value: string 557 // ---------------------------------------------------------------------- 558 LIBPROTOBUF_EXPORT string SimpleDtoa(double value); 559 LIBPROTOBUF_EXPORT string SimpleFtoa(float value); 560 561 LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); 562 LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); 563 564 // In practice, doubles should never need more than 24 bytes and floats 565 // should never need more than 14 (including null terminators), but we 566 // overestimate to be safe. 567 static const int kDoubleToBufferSize = 32; 568 static const int kFloatToBufferSize = 24; 569 570 namespace strings { 571 572 enum PadSpec { 573 NO_PAD = 1, 574 ZERO_PAD_2, 575 ZERO_PAD_3, 576 ZERO_PAD_4, 577 ZERO_PAD_5, 578 ZERO_PAD_6, 579 ZERO_PAD_7, 580 ZERO_PAD_8, 581 ZERO_PAD_9, 582 ZERO_PAD_10, 583 ZERO_PAD_11, 584 ZERO_PAD_12, 585 ZERO_PAD_13, 586 ZERO_PAD_14, 587 ZERO_PAD_15, 588 ZERO_PAD_16, 589 }; 590 591 struct Hex { 592 uint64 value; 593 enum PadSpec spec; 594 template <class Int> 595 explicit Hex(Int v, PadSpec s = NO_PAD) 596 : spec(s) { 597 // Prevent sign-extension by casting integers to 598 // their unsigned counterparts. 599 #ifdef LANG_CXX11 600 static_assert( 601 sizeof(v) == 1 || sizeof(v) == 2 || sizeof(v) == 4 || sizeof(v) == 8, 602 "Unknown integer type"); 603 #endif 604 value = sizeof(v) == 1 ? static_cast<uint8>(v) 605 : sizeof(v) == 2 ? static_cast<uint16>(v) 606 : sizeof(v) == 4 ? static_cast<uint32>(v) 607 : static_cast<uint64>(v); 608 } 609 }; 610 611 struct LIBPROTOBUF_EXPORT AlphaNum { 612 const char *piece_data_; // move these to string_ref eventually 613 size_t piece_size_; // move these to string_ref eventually 614 615 char digits[kFastToBufferSize]; 616 617 // No bool ctor -- bools convert to an integral type. 618 // A bool ctor would also convert incoming pointers (bletch). 619 620 AlphaNum(int32 i32) 621 : piece_data_(digits), 622 piece_size_(FastInt32ToBufferLeft(i32, digits) - &digits[0]) {} 623 AlphaNum(uint32 u32) 624 : piece_data_(digits), 625 piece_size_(FastUInt32ToBufferLeft(u32, digits) - &digits[0]) {} 626 AlphaNum(int64 i64) 627 : piece_data_(digits), 628 piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0]) {} 629 AlphaNum(uint64 u64) 630 : piece_data_(digits), 631 piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {} 632 633 AlphaNum(float f) 634 : piece_data_(digits), piece_size_(strlen(FloatToBuffer(f, digits))) {} 635 AlphaNum(double f) 636 : piece_data_(digits), piece_size_(strlen(DoubleToBuffer(f, digits))) {} 637 638 AlphaNum(Hex hex); 639 640 AlphaNum(const char* c_str) 641 : piece_data_(c_str), piece_size_(strlen(c_str)) {} 642 // TODO: Add a string_ref constructor, eventually 643 // AlphaNum(const StringPiece &pc) : piece(pc) {} 644 645 AlphaNum(const string& str) 646 : piece_data_(str.data()), piece_size_(str.size()) {} 647 648 AlphaNum(StringPiece str) 649 : piece_data_(str.data()), piece_size_(str.size()) {} 650 651 AlphaNum(internal::StringPiecePod str) 652 : piece_data_(str.data()), piece_size_(str.size()) {} 653 654 size_t size() const { return piece_size_; } 655 const char *data() const { return piece_data_; } 656 657 private: 658 // Use ":" not ':' 659 AlphaNum(char c); // NOLINT(runtime/explicit) 660 661 // Disallow copy and assign. 662 AlphaNum(const AlphaNum&); 663 void operator=(const AlphaNum&); 664 }; 665 666 } // namespace strings 667 668 using strings::AlphaNum; 669 670 // ---------------------------------------------------------------------- 671 // StrCat() 672 // This merges the given strings or numbers, with no delimiter. This 673 // is designed to be the fastest possible way to construct a string out 674 // of a mix of raw C strings, strings, bool values, 675 // and numeric values. 676 // 677 // Don't use this for user-visible strings. The localization process 678 // works poorly on strings built up out of fragments. 679 // 680 // For clarity and performance, don't use StrCat when appending to a 681 // string. In particular, avoid using any of these (anti-)patterns: 682 // str.append(StrCat(...) 683 // str += StrCat(...) 684 // str = StrCat(str, ...) 685 // where the last is the worse, with the potential to change a loop 686 // from a linear time operation with O(1) dynamic allocations into a 687 // quadratic time operation with O(n) dynamic allocations. StrAppend 688 // is a better choice than any of the above, subject to the restriction 689 // of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may 690 // be a reference into str. 691 // ---------------------------------------------------------------------- 692 693 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b); 694 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, 695 const AlphaNum& c); 696 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, 697 const AlphaNum& c, const AlphaNum& d); 698 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, 699 const AlphaNum& c, const AlphaNum& d, 700 const AlphaNum& e); 701 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, 702 const AlphaNum& c, const AlphaNum& d, 703 const AlphaNum& e, const AlphaNum& f); 704 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, 705 const AlphaNum& c, const AlphaNum& d, 706 const AlphaNum& e, const AlphaNum& f, 707 const AlphaNum& g); 708 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, 709 const AlphaNum& c, const AlphaNum& d, 710 const AlphaNum& e, const AlphaNum& f, 711 const AlphaNum& g, const AlphaNum& h); 712 LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, 713 const AlphaNum& c, const AlphaNum& d, 714 const AlphaNum& e, const AlphaNum& f, 715 const AlphaNum& g, const AlphaNum& h, 716 const AlphaNum& i); 717 718 inline string StrCat(const AlphaNum& a) { return string(a.data(), a.size()); } 719 720 // ---------------------------------------------------------------------- 721 // StrAppend() 722 // Same as above, but adds the output to the given string. 723 // WARNING: For speed, StrAppend does not try to check each of its input 724 // arguments to be sure that they are not a subset of the string being 725 // appended to. That is, while this will work: 726 // 727 // string s = "foo"; 728 // s += s; 729 // 730 // This will not (necessarily) work: 731 // 732 // string s = "foo"; 733 // StrAppend(&s, s); 734 // 735 // Note: while StrCat supports appending up to 9 arguments, StrAppend 736 // is currently limited to 4. That's rarely an issue except when 737 // automatically transforming StrCat to StrAppend, and can easily be 738 // worked around as consecutive calls to StrAppend are quite efficient. 739 // ---------------------------------------------------------------------- 740 741 LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a); 742 LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, 743 const AlphaNum& b); 744 LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, 745 const AlphaNum& b, const AlphaNum& c); 746 LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, 747 const AlphaNum& b, const AlphaNum& c, 748 const AlphaNum& d); 749 750 // ---------------------------------------------------------------------- 751 // Join() 752 // These methods concatenate a range of components into a C++ string, using 753 // the C-string "delim" as a separator between components. 754 // ---------------------------------------------------------------------- 755 template <typename Iterator> 756 void Join(Iterator start, Iterator end, 757 const char* delim, string* result) { 758 for (Iterator it = start; it != end; ++it) { 759 if (it != start) { 760 result->append(delim); 761 } 762 StrAppend(result, *it); 763 } 764 } 765 766 template <typename Range> 767 string Join(const Range& components, 768 const char* delim) { 769 string result; 770 Join(components.begin(), components.end(), delim, &result); 771 return result; 772 } 773 774 // ---------------------------------------------------------------------- 775 // ToHex() 776 // Return a lower-case hex string representation of the given integer. 777 // ---------------------------------------------------------------------- 778 LIBPROTOBUF_EXPORT string ToHex(uint64 num); 779 780 // ---------------------------------------------------------------------- 781 // GlobalReplaceSubstring() 782 // Replaces all instances of a substring in a string. Does nothing 783 // if 'substring' is empty. Returns the number of replacements. 784 // 785 // NOTE: The string pieces must not overlap s. 786 // ---------------------------------------------------------------------- 787 LIBPROTOBUF_EXPORT int GlobalReplaceSubstring(const string& substring, 788 const string& replacement, 789 string* s); 790 791 // ---------------------------------------------------------------------- 792 // Base64Unescape() 793 // Converts "src" which is encoded in Base64 to its binary equivalent and 794 // writes it to "dest". If src contains invalid characters, dest is cleared 795 // and the function returns false. Returns true on success. 796 // ---------------------------------------------------------------------- 797 LIBPROTOBUF_EXPORT bool Base64Unescape(StringPiece src, string* dest); 798 799 // ---------------------------------------------------------------------- 800 // WebSafeBase64Unescape() 801 // This is a variation of Base64Unescape which uses '-' instead of '+', and 802 // '_' instead of '/'. src is not null terminated, instead specify len. I 803 // recommend that slen<szdest, but we honor szdest anyway. 804 // RETURNS the length of dest, or -1 if src contains invalid chars. 805 806 // The variation that stores into a string clears the string first, and 807 // returns false (with dest empty) if src contains invalid chars; for 808 // this version src and dest must be different strings. 809 // ---------------------------------------------------------------------- 810 LIBPROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen, 811 char* dest, int szdest); 812 LIBPROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, string* dest); 813 814 // Return the length to use for the output buffer given to the base64 escape 815 // routines. Make sure to use the same value for do_padding in both. 816 // This function may return incorrect results if given input_len values that 817 // are extremely high, which should happen rarely. 818 LIBPROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len, 819 bool do_padding); 820 // Use this version when calling Base64Escape without a do_padding arg. 821 LIBPROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len); 822 823 // ---------------------------------------------------------------------- 824 // Base64Escape() 825 // WebSafeBase64Escape() 826 // Encode "src" to "dest" using base64 encoding. 827 // src is not null terminated, instead specify len. 828 // 'dest' should have at least CalculateBase64EscapedLen() length. 829 // RETURNS the length of dest. 830 // The WebSafe variation use '-' instead of '+' and '_' instead of '/' 831 // so that we can place the out in the URL or cookies without having 832 // to escape them. It also has an extra parameter "do_padding", 833 // which when set to false will prevent padding with "=". 834 // ---------------------------------------------------------------------- 835 LIBPROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen, 836 char* dest, int szdest); 837 LIBPROTOBUF_EXPORT int WebSafeBase64Escape( 838 const unsigned char* src, int slen, char* dest, 839 int szdest, bool do_padding); 840 // Encode src into dest with padding. 841 LIBPROTOBUF_EXPORT void Base64Escape(StringPiece src, string* dest); 842 // Encode src into dest web-safely without padding. 843 LIBPROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, string* dest); 844 // Encode src into dest web-safely with padding. 845 LIBPROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src, 846 string* dest); 847 848 LIBPROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc, 849 string* dest, bool do_padding); 850 LIBPROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc, 851 string* dest, bool do_padding); 852 853 inline bool IsValidCodePoint(uint32 code_point) { 854 return code_point < 0xD800 || 855 (code_point >= 0xE000 && code_point <= 0x10FFFF); 856 } 857 858 static const int UTFmax = 4; 859 // ---------------------------------------------------------------------- 860 // EncodeAsUTF8Char() 861 // Helper to append a Unicode code point to a string as UTF8, without bringing 862 // in any external dependencies. The output buffer must be as least 4 bytes 863 // large. 864 // ---------------------------------------------------------------------- 865 LIBPROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output); 866 867 // ---------------------------------------------------------------------- 868 // UTF8FirstLetterNumBytes() 869 // Length of the first UTF-8 character. 870 // ---------------------------------------------------------------------- 871 LIBPROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len); 872 873 } // namespace protobuf 874 } // namespace google 875 876 #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 877