1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // from google3/strings/strutil.h 32 33 #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 34 #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 35 36 #include <stdlib.h> 37 #include <vector> 38 #include <google/protobuf/stubs/common.h> 39 40 namespace google { 41 namespace protobuf { 42 43 #ifdef _MSC_VER 44 #define strtoll _strtoi64 45 #define strtoull _strtoui64 46 #elif defined(__DECCXX) && defined(__osf__) 47 // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. 48 #define strtoll strtol 49 #define strtoull strtoul 50 #endif 51 52 // ---------------------------------------------------------------------- 53 // ascii_isalnum() 54 // Check if an ASCII character is alphanumeric. We can't use ctype's 55 // isalnum() because it is affected by locale. This function is applied 56 // to identifiers in the protocol buffer language, not to natural-language 57 // strings, so locale should not be taken into account. 58 // ascii_isdigit() 59 // Like above, but only accepts digits. 60 // ---------------------------------------------------------------------- 61 62 inline bool ascii_isalnum(char c) { 63 return ('a' <= c && c <= 'z') || 64 ('A' <= c && c <= 'Z') || 65 ('0' <= c && c <= '9'); 66 } 67 68 inline bool ascii_isdigit(char c) { 69 return ('0' <= c && c <= '9'); 70 } 71 72 // ---------------------------------------------------------------------- 73 // HasPrefixString() 74 // Check if a string begins with a given prefix. 75 // StripPrefixString() 76 // Given a string and a putative prefix, returns the string minus the 77 // prefix string if the prefix matches, otherwise the original 78 // string. 79 // ---------------------------------------------------------------------- 80 inline bool HasPrefixString(const string& str, 81 const string& prefix) { 82 return str.size() >= prefix.size() && 83 str.compare(0, prefix.size(), prefix) == 0; 84 } 85 86 inline string StripPrefixString(const string& str, const string& prefix) { 87 if (HasPrefixString(str, prefix)) { 88 return str.substr(prefix.size()); 89 } else { 90 return str; 91 } 92 } 93 94 // ---------------------------------------------------------------------- 95 // HasSuffixString() 96 // Return true if str ends in suffix. 97 // StripSuffixString() 98 // Given a string and a putative suffix, returns the string minus the 99 // suffix string if the suffix matches, otherwise the original 100 // string. 101 // ---------------------------------------------------------------------- 102 inline bool HasSuffixString(const string& str, 103 const string& suffix) { 104 return str.size() >= suffix.size() && 105 str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; 106 } 107 108 inline string StripSuffixString(const string& str, const string& suffix) { 109 if (HasSuffixString(str, suffix)) { 110 return str.substr(0, str.size() - suffix.size()); 111 } else { 112 return str; 113 } 114 } 115 116 // ---------------------------------------------------------------------- 117 // StripString 118 // Replaces any occurrence of the character 'remove' (or the characters 119 // in 'remove') with the character 'replacewith'. 120 // Good for keeping html characters or protocol characters (\t) out 121 // of places where they might cause a problem. 122 // ---------------------------------------------------------------------- 123 LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove, 124 char replacewith); 125 126 // ---------------------------------------------------------------------- 127 // LowerString() 128 // UpperString() 129 // ToUpper() 130 // Convert the characters in "s" to lowercase or uppercase. ASCII-only: 131 // these functions intentionally ignore locale because they are applied to 132 // identifiers used in the Protocol Buffer language, not to natural-language 133 // strings. 134 // ---------------------------------------------------------------------- 135 136 inline void LowerString(string * s) { 137 string::iterator end = s->end(); 138 for (string::iterator i = s->begin(); i != end; ++i) { 139 // tolower() changes based on locale. We don't want this! 140 if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; 141 } 142 } 143 144 inline void UpperString(string * s) { 145 string::iterator end = s->end(); 146 for (string::iterator i = s->begin(); i != end; ++i) { 147 // toupper() changes based on locale. We don't want this! 148 if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; 149 } 150 } 151 152 inline string ToUpper(const string& s) { 153 string out = s; 154 UpperString(&out); 155 return out; 156 } 157 158 // ---------------------------------------------------------------------- 159 // StringReplace() 160 // Give me a string and two patterns "old" and "new", and I replace 161 // the first instance of "old" in the string with "new", if it 162 // exists. RETURN a new string, regardless of whether the replacement 163 // happened or not. 164 // ---------------------------------------------------------------------- 165 166 LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, 167 const string& newsub, bool replace_all); 168 169 // ---------------------------------------------------------------------- 170 // SplitStringUsing() 171 // Split a string using a character delimiter. Append the components 172 // to 'result'. If there are consecutive delimiters, this function skips 173 // over all of them. 174 // ---------------------------------------------------------------------- 175 LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, 176 vector<string>* res); 177 178 // Split a string using one or more byte delimiters, presented 179 // as a nul-terminated c string. Append the components to 'result'. 180 // If there are consecutive delimiters, this function will return 181 // corresponding empty strings. If you want to drop the empty 182 // strings, try SplitStringUsing(). 183 // 184 // If "full" is the empty string, yields an empty string as the only value. 185 // ---------------------------------------------------------------------- 186 LIBPROTOBUF_EXPORT void SplitStringAllowEmpty(const string& full, 187 const char* delim, 188 vector<string>* result); 189 190 // ---------------------------------------------------------------------- 191 // Split() 192 // Split a string using a character delimiter. 193 // ---------------------------------------------------------------------- 194 inline vector<string> Split( 195 const string& full, const char* delim, bool skip_empty = true) { 196 vector<string> result; 197 if (skip_empty) { 198 SplitStringUsing(full, delim, &result); 199 } else { 200 SplitStringAllowEmpty(full, delim, &result); 201 } 202 return result; 203 } 204 205 // ---------------------------------------------------------------------- 206 // JoinStrings() 207 // These methods concatenate a vector of strings into a C++ string, using 208 // the C-string "delim" as a separator between components. There are two 209 // flavors of the function, one flavor returns the concatenated string, 210 // another takes a pointer to the target string. In the latter case the 211 // target string is cleared and overwritten. 212 // ---------------------------------------------------------------------- 213 LIBPROTOBUF_EXPORT void JoinStrings(const vector<string>& components, 214 const char* delim, string* result); 215 216 inline string JoinStrings(const vector<string>& components, 217 const char* delim) { 218 string result; 219 JoinStrings(components, delim, &result); 220 return result; 221 } 222 223 // ---------------------------------------------------------------------- 224 // UnescapeCEscapeSequences() 225 // Copies "source" to "dest", rewriting C-style escape sequences 226 // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII 227 // equivalents. "dest" must be sufficiently large to hold all 228 // the characters in the rewritten string (i.e. at least as large 229 // as strlen(source) + 1 should be safe, since the replacements 230 // are always shorter than the original escaped sequences). It's 231 // safe for source and dest to be the same. RETURNS the length 232 // of dest. 233 // 234 // It allows hex sequences \xhh, or generally \xhhhhh with an 235 // arbitrary number of hex digits, but all of them together must 236 // specify a value of a single byte (e.g. \x0045 is equivalent 237 // to \x45, and \x1234 is erroneous). 238 // 239 // It also allows escape sequences of the form \uhhhh (exactly four 240 // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight 241 // hex digits, upper or lower case) to specify a Unicode code 242 // point. The dest array will contain the UTF8-encoded version of 243 // that code-point (e.g., if source contains \u2019, then dest will 244 // contain the three bytes 0xE2, 0x80, and 0x99). 245 // 246 // Errors: In the first form of the call, errors are reported with 247 // LOG(ERROR). The same is true for the second form of the call if 248 // the pointer to the string vector is NULL; otherwise, error 249 // messages are stored in the vector. In either case, the effect on 250 // the dest array is not defined, but rest of the source will be 251 // processed. 252 // ---------------------------------------------------------------------- 253 254 LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); 255 LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, 256 vector<string> *errors); 257 258 // ---------------------------------------------------------------------- 259 // UnescapeCEscapeString() 260 // This does the same thing as UnescapeCEscapeSequences, but creates 261 // a new string. The caller does not need to worry about allocating 262 // a dest buffer. This should be used for non performance critical 263 // tasks such as printing debug messages. It is safe for src and dest 264 // to be the same. 265 // 266 // The second call stores its errors in a supplied string vector. 267 // If the string vector pointer is NULL, it reports the errors with LOG(). 268 // 269 // In the first and second calls, the length of dest is returned. In the 270 // the third call, the new string is returned. 271 // ---------------------------------------------------------------------- 272 273 LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); 274 LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, 275 vector<string> *errors); 276 LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); 277 278 // ---------------------------------------------------------------------- 279 // CEscapeString() 280 // Copies 'src' to 'dest', escaping dangerous characters using 281 // C-style escape sequences. This is very useful for preparing query 282 // flags. 'src' and 'dest' should not overlap. 283 // Returns the number of bytes written to 'dest' (not including the \0) 284 // or -1 if there was insufficient space. 285 // 286 // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 287 // ---------------------------------------------------------------------- 288 LIBPROTOBUF_EXPORT int CEscapeString(const char* src, int src_len, 289 char* dest, int dest_len); 290 291 // ---------------------------------------------------------------------- 292 // CEscape() 293 // More convenient form of CEscapeString: returns result as a "string". 294 // This version is slower than CEscapeString() because it does more 295 // allocation. However, it is much more convenient to use in 296 // non-speed-critical code like logging messages etc. 297 // ---------------------------------------------------------------------- 298 LIBPROTOBUF_EXPORT string CEscape(const string& src); 299 300 namespace strings { 301 // Like CEscape() but does not escape bytes with the upper bit set. 302 LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); 303 304 // Like CEscape() but uses hex (\x) escapes instead of octals. 305 LIBPROTOBUF_EXPORT string CHexEscape(const string& src); 306 } // namespace strings 307 308 // ---------------------------------------------------------------------- 309 // strto32() 310 // strtou32() 311 // strto64() 312 // strtou64() 313 // Architecture-neutral plug compatible replacements for strtol() and 314 // strtoul(). Long's have different lengths on ILP-32 and LP-64 315 // platforms, so using these is safer, from the point of view of 316 // overflow behavior, than using the standard libc functions. 317 // ---------------------------------------------------------------------- 318 LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char *nptr, char **endptr, 319 int base); 320 LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char *nptr, char **endptr, 321 int base); 322 323 inline int32 strto32(const char *nptr, char **endptr, int base) { 324 if (sizeof(int32) == sizeof(long)) 325 return strtol(nptr, endptr, base); 326 else 327 return strto32_adaptor(nptr, endptr, base); 328 } 329 330 inline uint32 strtou32(const char *nptr, char **endptr, int base) { 331 if (sizeof(uint32) == sizeof(unsigned long)) 332 return strtoul(nptr, endptr, base); 333 else 334 return strtou32_adaptor(nptr, endptr, base); 335 } 336 337 // For now, long long is 64-bit on all the platforms we care about, so these 338 // functions can simply pass the call to strto[u]ll. 339 inline int64 strto64(const char *nptr, char **endptr, int base) { 340 GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), 341 sizeof_int64_is_not_sizeof_long_long); 342 return strtoll(nptr, endptr, base); 343 } 344 345 inline uint64 strtou64(const char *nptr, char **endptr, int base) { 346 GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), 347 sizeof_uint64_is_not_sizeof_long_long); 348 return strtoull(nptr, endptr, base); 349 } 350 351 // ---------------------------------------------------------------------- 352 // safe_strto32() 353 // ---------------------------------------------------------------------- 354 LIBPROTOBUF_EXPORT bool safe_int(string text, int32* value_p); 355 356 inline bool safe_strto32(string text, int32* value) { 357 return safe_int(text, value); 358 } 359 360 // ---------------------------------------------------------------------- 361 // FastIntToBuffer() 362 // FastHexToBuffer() 363 // FastHex64ToBuffer() 364 // FastHex32ToBuffer() 365 // FastTimeToBuffer() 366 // These are intended for speed. FastIntToBuffer() assumes the 367 // integer is non-negative. FastHexToBuffer() puts output in 368 // hex rather than decimal. FastTimeToBuffer() puts the output 369 // into RFC822 format. 370 // 371 // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, 372 // padded to exactly 16 bytes (plus one byte for '\0') 373 // 374 // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, 375 // padded to exactly 8 bytes (plus one byte for '\0') 376 // 377 // All functions take the output buffer as an arg. 378 // They all return a pointer to the beginning of the output, 379 // which may not be the beginning of the input buffer. 380 // ---------------------------------------------------------------------- 381 382 // Suggested buffer size for FastToBuffer functions. Also works with 383 // DoubleToBuffer() and FloatToBuffer(). 384 static const int kFastToBufferSize = 32; 385 386 LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); 387 LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); 388 char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below 389 char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below 390 LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); 391 LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); 392 LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); 393 394 // at least 22 bytes long 395 inline char* FastIntToBuffer(int i, char* buffer) { 396 return (sizeof(i) == 4 ? 397 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 398 } 399 inline char* FastUIntToBuffer(unsigned int i, char* buffer) { 400 return (sizeof(i) == 4 ? 401 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 402 } 403 inline char* FastLongToBuffer(long i, char* buffer) { 404 return (sizeof(i) == 4 ? 405 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 406 } 407 inline char* FastULongToBuffer(unsigned long i, char* buffer) { 408 return (sizeof(i) == 4 ? 409 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 410 } 411 412 // ---------------------------------------------------------------------- 413 // FastInt32ToBufferLeft() 414 // FastUInt32ToBufferLeft() 415 // FastInt64ToBufferLeft() 416 // FastUInt64ToBufferLeft() 417 // 418 // Like the Fast*ToBuffer() functions above, these are intended for speed. 419 // Unlike the Fast*ToBuffer() functions, however, these functions write 420 // their output to the beginning of the buffer (hence the name, as the 421 // output is left-aligned). The caller is responsible for ensuring that 422 // the buffer has enough space to hold the output. 423 // 424 // Returns a pointer to the end of the string (i.e. the null character 425 // terminating the string). 426 // ---------------------------------------------------------------------- 427 428 LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); 429 LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); 430 LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); 431 LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); 432 433 // Just define these in terms of the above. 434 inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { 435 FastUInt32ToBufferLeft(i, buffer); 436 return buffer; 437 } 438 inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { 439 FastUInt64ToBufferLeft(i, buffer); 440 return buffer; 441 } 442 443 // ---------------------------------------------------------------------- 444 // SimpleItoa() 445 // Description: converts an integer to a string. 446 // 447 // Return value: string 448 // ---------------------------------------------------------------------- 449 LIBPROTOBUF_EXPORT string SimpleItoa(int i); 450 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i); 451 LIBPROTOBUF_EXPORT string SimpleItoa(long i); 452 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i); 453 LIBPROTOBUF_EXPORT string SimpleItoa(long long i); 454 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); 455 456 // ---------------------------------------------------------------------- 457 // SimpleDtoa() 458 // SimpleFtoa() 459 // DoubleToBuffer() 460 // FloatToBuffer() 461 // Description: converts a double or float to a string which, if 462 // passed to NoLocaleStrtod(), will produce the exact same original double 463 // (except in case of NaN; all NaNs are considered the same value). 464 // We try to keep the string short but it's not guaranteed to be as 465 // short as possible. 466 // 467 // DoubleToBuffer() and FloatToBuffer() write the text to the given 468 // buffer and return it. The buffer must be at least 469 // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize 470 // bytes for floats. kFastToBufferSize is also guaranteed to be large 471 // enough to hold either. 472 // 473 // Return value: string 474 // ---------------------------------------------------------------------- 475 LIBPROTOBUF_EXPORT string SimpleDtoa(double value); 476 LIBPROTOBUF_EXPORT string SimpleFtoa(float value); 477 478 LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); 479 LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); 480 481 // In practice, doubles should never need more than 24 bytes and floats 482 // should never need more than 14 (including null terminators), but we 483 // overestimate to be safe. 484 static const int kDoubleToBufferSize = 32; 485 static const int kFloatToBufferSize = 24; 486 487 // ---------------------------------------------------------------------- 488 // ToString() are internal help methods used in StrCat() and Join() 489 // ---------------------------------------------------------------------- 490 namespace internal { 491 inline string ToString(int i) { 492 return SimpleItoa(i); 493 } 494 495 inline string ToString(string a) { 496 return a; 497 } 498 } // namespace internal 499 500 // ---------------------------------------------------------------------- 501 // StrCat() 502 // These methods join some strings together. 503 // ---------------------------------------------------------------------- 504 template <typename T1, typename T2, typename T3, typename T4, typename T5> 505 string StrCat( 506 const T1& a, const T2& b, const T3& c, const T4& d, const T5& e) { 507 return internal::ToString(a) + internal::ToString(b) + 508 internal::ToString(c) + internal::ToString(d) + internal::ToString(e); 509 } 510 511 template <typename T1, typename T2, typename T3, typename T4> 512 string StrCat( 513 const T1& a, const T2& b, const T3& c, const T4& d) { 514 return internal::ToString(a) + internal::ToString(b) + 515 internal::ToString(c) + internal::ToString(d); 516 } 517 518 template <typename T1, typename T2, typename T3> 519 string StrCat(const T1& a, const T2& b, const T3& c) { 520 return internal::ToString(a) + internal::ToString(b) + 521 internal::ToString(c); 522 } 523 524 template <typename T1, typename T2> 525 string StrCat(const T1& a, const T2& b) { 526 return internal::ToString(a) + internal::ToString(b); 527 } 528 529 // ---------------------------------------------------------------------- 530 // Join() 531 // These methods concatenate a range of components into a C++ string, using 532 // the C-string "delim" as a separator between components. 533 // ---------------------------------------------------------------------- 534 template <typename Iterator> 535 void Join(Iterator start, Iterator end, 536 const char* delim, string* result) { 537 for (Iterator it = start; it != end; ++it) { 538 if (it != start) { 539 result->append(delim); 540 } 541 result->append(internal::ToString(*it)); 542 } 543 } 544 545 template <typename Range> 546 string Join(const Range& components, 547 const char* delim) { 548 string result; 549 Join(components.begin(), components.end(), delim, &result); 550 return result; 551 } 552 553 // ---------------------------------------------------------------------- 554 // ToHex() 555 // Return a lower-case hex string representation of the given integer. 556 // ---------------------------------------------------------------------- 557 LIBPROTOBUF_EXPORT string ToHex(uint64 num); 558 559 } // namespace protobuf 560 } // namespace google 561 562 #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 563