1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // from google3/strings/strutil.h 32 33 #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 34 #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 35 36 #include <stdlib.h> 37 #include <vector> 38 #include <google/protobuf/stubs/common.h> 39 40 namespace google { 41 namespace protobuf { 42 43 #ifdef _MSC_VER 44 #define strtoll _strtoi64 45 #define strtoull _strtoui64 46 #elif defined(__DECCXX) && defined(__osf__) 47 // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. 48 #define strtoll strtol 49 #define strtoull strtoul 50 #endif 51 52 // ---------------------------------------------------------------------- 53 // ascii_isalnum() 54 // Check if an ASCII character is alphanumeric. We can't use ctype's 55 // isalnum() because it is affected by locale. This function is applied 56 // to identifiers in the protocol buffer language, not to natural-language 57 // strings, so locale should not be taken into account. 58 // ascii_isdigit() 59 // Like above, but only accepts digits. 60 // ---------------------------------------------------------------------- 61 62 inline bool ascii_isalnum(char c) { 63 return ('a' <= c && c <= 'z') || 64 ('A' <= c && c <= 'Z') || 65 ('0' <= c && c <= '9'); 66 } 67 68 inline bool ascii_isdigit(char c) { 69 return ('0' <= c && c <= '9'); 70 } 71 72 // ---------------------------------------------------------------------- 73 // HasPrefixString() 74 // Check if a string begins with a given prefix. 75 // StripPrefixString() 76 // Given a string and a putative prefix, returns the string minus the 77 // prefix string if the prefix matches, otherwise the original 78 // string. 79 // ---------------------------------------------------------------------- 80 inline bool HasPrefixString(const string& str, 81 const string& prefix) { 82 return str.size() >= prefix.size() && 83 str.compare(0, prefix.size(), prefix) == 0; 84 } 85 86 inline string StripPrefixString(const string& str, const string& prefix) { 87 if (HasPrefixString(str, prefix)) { 88 return str.substr(prefix.size()); 89 } else { 90 return str; 91 } 92 } 93 94 // ---------------------------------------------------------------------- 95 // HasSuffixString() 96 // Return true if str ends in suffix. 97 // StripSuffixString() 98 // Given a string and a putative suffix, returns the string minus the 99 // suffix string if the suffix matches, otherwise the original 100 // string. 101 // ---------------------------------------------------------------------- 102 inline bool HasSuffixString(const string& str, 103 const string& suffix) { 104 return str.size() >= suffix.size() && 105 str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; 106 } 107 108 inline string StripSuffixString(const string& str, const string& suffix) { 109 if (HasSuffixString(str, suffix)) { 110 return str.substr(0, str.size() - suffix.size()); 111 } else { 112 return str; 113 } 114 } 115 116 // ---------------------------------------------------------------------- 117 // StripString 118 // Replaces any occurrence of the character 'remove' (or the characters 119 // in 'remove') with the character 'replacewith'. 120 // Good for keeping html characters or protocol characters (\t) out 121 // of places where they might cause a problem. 122 // ---------------------------------------------------------------------- 123 LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove, 124 char replacewith); 125 126 // ---------------------------------------------------------------------- 127 // LowerString() 128 // UpperString() 129 // Convert the characters in "s" to lowercase or uppercase. ASCII-only: 130 // these functions intentionally ignore locale because they are applied to 131 // identifiers used in the Protocol Buffer language, not to natural-language 132 // strings. 133 // ---------------------------------------------------------------------- 134 135 inline void LowerString(string * s) { 136 string::iterator end = s->end(); 137 for (string::iterator i = s->begin(); i != end; ++i) { 138 // tolower() changes based on locale. We don't want this! 139 if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; 140 } 141 } 142 143 inline void UpperString(string * s) { 144 string::iterator end = s->end(); 145 for (string::iterator i = s->begin(); i != end; ++i) { 146 // toupper() changes based on locale. We don't want this! 147 if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; 148 } 149 } 150 151 // ---------------------------------------------------------------------- 152 // StringReplace() 153 // Give me a string and two patterns "old" and "new", and I replace 154 // the first instance of "old" in the string with "new", if it 155 // exists. RETURN a new string, regardless of whether the replacement 156 // happened or not. 157 // ---------------------------------------------------------------------- 158 159 LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, 160 const string& newsub, bool replace_all); 161 162 // ---------------------------------------------------------------------- 163 // SplitStringUsing() 164 // Split a string using a character delimiter. Append the components 165 // to 'result'. If there are consecutive delimiters, this function skips 166 // over all of them. 167 // ---------------------------------------------------------------------- 168 LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, 169 vector<string>* res); 170 171 // ---------------------------------------------------------------------- 172 // JoinStrings() 173 // These methods concatenate a vector of strings into a C++ string, using 174 // the C-string "delim" as a separator between components. There are two 175 // flavors of the function, one flavor returns the concatenated string, 176 // another takes a pointer to the target string. In the latter case the 177 // target string is cleared and overwritten. 178 // ---------------------------------------------------------------------- 179 LIBPROTOBUF_EXPORT void JoinStrings(const vector<string>& components, 180 const char* delim, string* result); 181 182 inline string JoinStrings(const vector<string>& components, 183 const char* delim) { 184 string result; 185 JoinStrings(components, delim, &result); 186 return result; 187 } 188 189 // ---------------------------------------------------------------------- 190 // UnescapeCEscapeSequences() 191 // Copies "source" to "dest", rewriting C-style escape sequences 192 // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII 193 // equivalents. "dest" must be sufficiently large to hold all 194 // the characters in the rewritten string (i.e. at least as large 195 // as strlen(source) + 1 should be safe, since the replacements 196 // are always shorter than the original escaped sequences). It's 197 // safe for source and dest to be the same. RETURNS the length 198 // of dest. 199 // 200 // It allows hex sequences \xhh, or generally \xhhhhh with an 201 // arbitrary number of hex digits, but all of them together must 202 // specify a value of a single byte (e.g. \x0045 is equivalent 203 // to \x45, and \x1234 is erroneous). 204 // 205 // It also allows escape sequences of the form \uhhhh (exactly four 206 // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight 207 // hex digits, upper or lower case) to specify a Unicode code 208 // point. The dest array will contain the UTF8-encoded version of 209 // that code-point (e.g., if source contains \u2019, then dest will 210 // contain the three bytes 0xE2, 0x80, and 0x99). For the inverse 211 // transformation, use UniLib::UTF8EscapeString 212 // (util/utf8/unilib.h), not CEscapeString. 213 // 214 // Errors: In the first form of the call, errors are reported with 215 // LOG(ERROR). The same is true for the second form of the call if 216 // the pointer to the string vector is NULL; otherwise, error 217 // messages are stored in the vector. In either case, the effect on 218 // the dest array is not defined, but rest of the source will be 219 // processed. 220 // ---------------------------------------------------------------------- 221 222 LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); 223 LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, 224 vector<string> *errors); 225 226 // ---------------------------------------------------------------------- 227 // UnescapeCEscapeString() 228 // This does the same thing as UnescapeCEscapeSequences, but creates 229 // a new string. The caller does not need to worry about allocating 230 // a dest buffer. This should be used for non performance critical 231 // tasks such as printing debug messages. It is safe for src and dest 232 // to be the same. 233 // 234 // The second call stores its errors in a supplied string vector. 235 // If the string vector pointer is NULL, it reports the errors with LOG(). 236 // 237 // In the first and second calls, the length of dest is returned. In the 238 // the third call, the new string is returned. 239 // ---------------------------------------------------------------------- 240 241 LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); 242 LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, 243 vector<string> *errors); 244 LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); 245 246 // ---------------------------------------------------------------------- 247 // CEscapeString() 248 // Copies 'src' to 'dest', escaping dangerous characters using 249 // C-style escape sequences. This is very useful for preparing query 250 // flags. 'src' and 'dest' should not overlap. 251 // Returns the number of bytes written to 'dest' (not including the \0) 252 // or -1 if there was insufficient space. 253 // 254 // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 255 // ---------------------------------------------------------------------- 256 LIBPROTOBUF_EXPORT int CEscapeString(const char* src, int src_len, 257 char* dest, int dest_len); 258 259 // ---------------------------------------------------------------------- 260 // CEscape() 261 // More convenient form of CEscapeString: returns result as a "string". 262 // This version is slower than CEscapeString() because it does more 263 // allocation. However, it is much more convenient to use in 264 // non-speed-critical code like logging messages etc. 265 // ---------------------------------------------------------------------- 266 LIBPROTOBUF_EXPORT string CEscape(const string& src); 267 268 namespace strings { 269 // Like CEscape() but does not escape bytes with the upper bit set. 270 LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); 271 272 // Like CEscape() but uses hex (\x) escapes instead of octals. 273 LIBPROTOBUF_EXPORT string CHexEscape(const string& src); 274 } // namespace strings 275 276 // ---------------------------------------------------------------------- 277 // strto32() 278 // strtou32() 279 // strto64() 280 // strtou64() 281 // Architecture-neutral plug compatible replacements for strtol() and 282 // strtoul(). Long's have different lengths on ILP-32 and LP-64 283 // platforms, so using these is safer, from the point of view of 284 // overflow behavior, than using the standard libc functions. 285 // ---------------------------------------------------------------------- 286 LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char *nptr, char **endptr, 287 int base); 288 LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char *nptr, char **endptr, 289 int base); 290 291 inline int32 strto32(const char *nptr, char **endptr, int base) { 292 if (sizeof(int32) == sizeof(long)) 293 return strtol(nptr, endptr, base); 294 else 295 return strto32_adaptor(nptr, endptr, base); 296 } 297 298 inline uint32 strtou32(const char *nptr, char **endptr, int base) { 299 if (sizeof(uint32) == sizeof(unsigned long)) 300 return strtoul(nptr, endptr, base); 301 else 302 return strtou32_adaptor(nptr, endptr, base); 303 } 304 305 // For now, long long is 64-bit on all the platforms we care about, so these 306 // functions can simply pass the call to strto[u]ll. 307 inline int64 strto64(const char *nptr, char **endptr, int base) { 308 GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), 309 sizeof_int64_is_not_sizeof_long_long); 310 return strtoll(nptr, endptr, base); 311 } 312 313 inline uint64 strtou64(const char *nptr, char **endptr, int base) { 314 GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), 315 sizeof_uint64_is_not_sizeof_long_long); 316 return strtoull(nptr, endptr, base); 317 } 318 319 // ---------------------------------------------------------------------- 320 // FastIntToBuffer() 321 // FastHexToBuffer() 322 // FastHex64ToBuffer() 323 // FastHex32ToBuffer() 324 // FastTimeToBuffer() 325 // These are intended for speed. FastIntToBuffer() assumes the 326 // integer is non-negative. FastHexToBuffer() puts output in 327 // hex rather than decimal. FastTimeToBuffer() puts the output 328 // into RFC822 format. 329 // 330 // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, 331 // padded to exactly 16 bytes (plus one byte for '\0') 332 // 333 // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, 334 // padded to exactly 8 bytes (plus one byte for '\0') 335 // 336 // All functions take the output buffer as an arg. 337 // They all return a pointer to the beginning of the output, 338 // which may not be the beginning of the input buffer. 339 // ---------------------------------------------------------------------- 340 341 // Suggested buffer size for FastToBuffer functions. Also works with 342 // DoubleToBuffer() and FloatToBuffer(). 343 static const int kFastToBufferSize = 32; 344 345 LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); 346 LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); 347 char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below 348 char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below 349 LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); 350 LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); 351 LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); 352 353 // at least 22 bytes long 354 inline char* FastIntToBuffer(int i, char* buffer) { 355 return (sizeof(i) == 4 ? 356 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 357 } 358 inline char* FastUIntToBuffer(unsigned int i, char* buffer) { 359 return (sizeof(i) == 4 ? 360 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 361 } 362 inline char* FastLongToBuffer(long i, char* buffer) { 363 return (sizeof(i) == 4 ? 364 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 365 } 366 inline char* FastULongToBuffer(unsigned long i, char* buffer) { 367 return (sizeof(i) == 4 ? 368 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 369 } 370 371 // ---------------------------------------------------------------------- 372 // FastInt32ToBufferLeft() 373 // FastUInt32ToBufferLeft() 374 // FastInt64ToBufferLeft() 375 // FastUInt64ToBufferLeft() 376 // 377 // Like the Fast*ToBuffer() functions above, these are intended for speed. 378 // Unlike the Fast*ToBuffer() functions, however, these functions write 379 // their output to the beginning of the buffer (hence the name, as the 380 // output is left-aligned). The caller is responsible for ensuring that 381 // the buffer has enough space to hold the output. 382 // 383 // Returns a pointer to the end of the string (i.e. the null character 384 // terminating the string). 385 // ---------------------------------------------------------------------- 386 387 LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); 388 LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); 389 LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); 390 LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); 391 392 // Just define these in terms of the above. 393 inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { 394 FastUInt32ToBufferLeft(i, buffer); 395 return buffer; 396 } 397 inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { 398 FastUInt64ToBufferLeft(i, buffer); 399 return buffer; 400 } 401 402 // ---------------------------------------------------------------------- 403 // SimpleItoa() 404 // Description: converts an integer to a string. 405 // 406 // Return value: string 407 // ---------------------------------------------------------------------- 408 LIBPROTOBUF_EXPORT string SimpleItoa(int i); 409 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i); 410 LIBPROTOBUF_EXPORT string SimpleItoa(long i); 411 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i); 412 LIBPROTOBUF_EXPORT string SimpleItoa(long long i); 413 LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); 414 415 // ---------------------------------------------------------------------- 416 // SimpleDtoa() 417 // SimpleFtoa() 418 // DoubleToBuffer() 419 // FloatToBuffer() 420 // Description: converts a double or float to a string which, if 421 // passed to NoLocaleStrtod(), will produce the exact same original double 422 // (except in case of NaN; all NaNs are considered the same value). 423 // We try to keep the string short but it's not guaranteed to be as 424 // short as possible. 425 // 426 // DoubleToBuffer() and FloatToBuffer() write the text to the given 427 // buffer and return it. The buffer must be at least 428 // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize 429 // bytes for floats. kFastToBufferSize is also guaranteed to be large 430 // enough to hold either. 431 // 432 // Return value: string 433 // ---------------------------------------------------------------------- 434 LIBPROTOBUF_EXPORT string SimpleDtoa(double value); 435 LIBPROTOBUF_EXPORT string SimpleFtoa(float value); 436 437 LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); 438 LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); 439 440 // In practice, doubles should never need more than 24 bytes and floats 441 // should never need more than 14 (including null terminators), but we 442 // overestimate to be safe. 443 static const int kDoubleToBufferSize = 32; 444 static const int kFloatToBufferSize = 24; 445 446 // ---------------------------------------------------------------------- 447 // NoLocaleStrtod() 448 // Exactly like strtod(), except it always behaves as if in the "C" 449 // locale (i.e. decimal points must be '.'s). 450 // ---------------------------------------------------------------------- 451 452 LIBPROTOBUF_EXPORT double NoLocaleStrtod(const char* text, char** endptr); 453 454 } // namespace protobuf 455 } // namespace google 456 457 #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 458 459 460