Home | History | Annotate | Download | only in stubs
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // from google3/strings/strutil.cc
     32 
     33 #include <google/protobuf/stubs/strutil.h>
     34 #include <google/protobuf/stubs/mathlimits.h>
     35 
     36 #include <errno.h>
     37 #include <float.h>    // FLT_DIG and DBL_DIG
     38 #include <limits>
     39 #include <limits.h>
     40 #include <stdio.h>
     41 #include <iterator>
     42 
     43 #include <google/protobuf/stubs/stl_util.h>
     44 
     45 #ifdef _WIN32
     46 // MSVC has only _snprintf, not snprintf.
     47 //
     48 // MinGW has both snprintf and _snprintf, but they appear to be different
     49 // functions.  The former is buggy.  When invoked like so:
     50 //   char buffer[32];
     51 //   snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
     52 // it prints "1.23000e+10".  This is plainly wrong:  %g should never print
     53 // trailing zeros after the decimal point.  For some reason this bug only
     54 // occurs with some input values, not all.  In any case, _snprintf does the
     55 // right thing, so we use it.
     56 #define snprintf _snprintf
     57 #endif
     58 
     59 namespace google {
     60 namespace protobuf {
     61 
     62 // These are defined as macros on some platforms.  #undef them so that we can
     63 // redefine them.
     64 #undef isxdigit
     65 #undef isprint
     66 
     67 // The definitions of these in ctype.h change based on locale.  Since our
     68 // string manipulation is all in relation to the protocol buffer and C++
     69 // languages, we always want to use the C locale.  So, we re-define these
     70 // exactly as we want them.
     71 inline bool isxdigit(char c) {
     72   return ('0' <= c && c <= '9') ||
     73          ('a' <= c && c <= 'f') ||
     74          ('A' <= c && c <= 'F');
     75 }
     76 
     77 inline bool isprint(char c) {
     78   return c >= 0x20 && c <= 0x7E;
     79 }
     80 
     81 // ----------------------------------------------------------------------
     82 // StripString
     83 //    Replaces any occurrence of the character 'remove' (or the characters
     84 //    in 'remove') with the character 'replacewith'.
     85 // ----------------------------------------------------------------------
     86 void StripString(string* s, const char* remove, char replacewith) {
     87   const char * str_start = s->c_str();
     88   const char * str = str_start;
     89   for (str = strpbrk(str, remove);
     90        str != NULL;
     91        str = strpbrk(str + 1, remove)) {
     92     (*s)[str - str_start] = replacewith;
     93   }
     94 }
     95 
     96 void StripWhitespace(string* str) {
     97   int str_length = str->length();
     98 
     99   // Strip off leading whitespace.
    100   int first = 0;
    101   while (first < str_length && ascii_isspace(str->at(first))) {
    102     ++first;
    103   }
    104   // If entire string is white space.
    105   if (first == str_length) {
    106     str->clear();
    107     return;
    108   }
    109   if (first > 0) {
    110     str->erase(0, first);
    111     str_length -= first;
    112   }
    113 
    114   // Strip off trailing whitespace.
    115   int last = str_length - 1;
    116   while (last >= 0 && ascii_isspace(str->at(last))) {
    117     --last;
    118   }
    119   if (last != (str_length - 1) && last >= 0) {
    120     str->erase(last + 1, string::npos);
    121   }
    122 }
    123 
    124 // ----------------------------------------------------------------------
    125 // StringReplace()
    126 //    Replace the "old" pattern with the "new" pattern in a string,
    127 //    and append the result to "res".  If replace_all is false,
    128 //    it only replaces the first instance of "old."
    129 // ----------------------------------------------------------------------
    130 
    131 void StringReplace(const string& s, const string& oldsub,
    132                    const string& newsub, bool replace_all,
    133                    string* res) {
    134   if (oldsub.empty()) {
    135     res->append(s);  // if empty, append the given string.
    136     return;
    137   }
    138 
    139   string::size_type start_pos = 0;
    140   string::size_type pos;
    141   do {
    142     pos = s.find(oldsub, start_pos);
    143     if (pos == string::npos) {
    144       break;
    145     }
    146     res->append(s, start_pos, pos - start_pos);
    147     res->append(newsub);
    148     start_pos = pos + oldsub.size();  // start searching again after the "old"
    149   } while (replace_all);
    150   res->append(s, start_pos, s.length() - start_pos);
    151 }
    152 
    153 // ----------------------------------------------------------------------
    154 // StringReplace()
    155 //    Give me a string and two patterns "old" and "new", and I replace
    156 //    the first instance of "old" in the string with "new", if it
    157 //    exists.  If "global" is true; call this repeatedly until it
    158 //    fails.  RETURN a new string, regardless of whether the replacement
    159 //    happened or not.
    160 // ----------------------------------------------------------------------
    161 
    162 string StringReplace(const string& s, const string& oldsub,
    163                      const string& newsub, bool replace_all) {
    164   string ret;
    165   StringReplace(s, oldsub, newsub, replace_all, &ret);
    166   return ret;
    167 }
    168 
    169 // ----------------------------------------------------------------------
    170 // SplitStringUsing()
    171 //    Split a string using a character delimiter. Append the components
    172 //    to 'result'.
    173 //
    174 // Note: For multi-character delimiters, this routine will split on *ANY* of
    175 // the characters in the string, not the entire string as a single delimiter.
    176 // ----------------------------------------------------------------------
    177 template <typename ITR>
    178 static inline
    179 void SplitStringToIteratorUsing(const string& full,
    180                                 const char* delim,
    181                                 ITR& result) {
    182   // Optimize the common case where delim is a single character.
    183   if (delim[0] != '\0' && delim[1] == '\0') {
    184     char c = delim[0];
    185     const char* p = full.data();
    186     const char* end = p + full.size();
    187     while (p != end) {
    188       if (*p == c) {
    189         ++p;
    190       } else {
    191         const char* start = p;
    192         while (++p != end && *p != c);
    193         *result++ = string(start, p - start);
    194       }
    195     }
    196     return;
    197   }
    198 
    199   string::size_type begin_index, end_index;
    200   begin_index = full.find_first_not_of(delim);
    201   while (begin_index != string::npos) {
    202     end_index = full.find_first_of(delim, begin_index);
    203     if (end_index == string::npos) {
    204       *result++ = full.substr(begin_index);
    205       return;
    206     }
    207     *result++ = full.substr(begin_index, (end_index - begin_index));
    208     begin_index = full.find_first_not_of(delim, end_index);
    209   }
    210 }
    211 
    212 void SplitStringUsing(const string& full,
    213                       const char* delim,
    214                       vector<string>* result) {
    215   back_insert_iterator< vector<string> > it(*result);
    216   SplitStringToIteratorUsing(full, delim, it);
    217 }
    218 
    219 // Split a string using a character delimiter. Append the components
    220 // to 'result'.  If there are consecutive delimiters, this function
    221 // will return corresponding empty strings. The string is split into
    222 // at most the specified number of pieces greedily. This means that the
    223 // last piece may possibly be split further. To split into as many pieces
    224 // as possible, specify 0 as the number of pieces.
    225 //
    226 // If "full" is the empty string, yields an empty string as the only value.
    227 //
    228 // If "pieces" is negative for some reason, it returns the whole string
    229 // ----------------------------------------------------------------------
    230 template <typename StringType, typename ITR>
    231 static inline
    232 void SplitStringToIteratorAllowEmpty(const StringType& full,
    233                                      const char* delim,
    234                                      int pieces,
    235                                      ITR& result) {
    236   string::size_type begin_index, end_index;
    237   begin_index = 0;
    238 
    239   for (int i = 0; (i < pieces-1) || (pieces == 0); i++) {
    240     end_index = full.find_first_of(delim, begin_index);
    241     if (end_index == string::npos) {
    242       *result++ = full.substr(begin_index);
    243       return;
    244     }
    245     *result++ = full.substr(begin_index, (end_index - begin_index));
    246     begin_index = end_index + 1;
    247   }
    248   *result++ = full.substr(begin_index);
    249 }
    250 
    251 void SplitStringAllowEmpty(const string& full, const char* delim,
    252                            vector<string>* result) {
    253   back_insert_iterator<vector<string> > it(*result);
    254   SplitStringToIteratorAllowEmpty(full, delim, 0, it);
    255 }
    256 
    257 // ----------------------------------------------------------------------
    258 // JoinStrings()
    259 //    This merges a vector of string components with delim inserted
    260 //    as separaters between components.
    261 //
    262 // ----------------------------------------------------------------------
    263 template <class ITERATOR>
    264 static void JoinStringsIterator(const ITERATOR& start,
    265                                 const ITERATOR& end,
    266                                 const char* delim,
    267                                 string* result) {
    268   GOOGLE_CHECK(result != NULL);
    269   result->clear();
    270   int delim_length = strlen(delim);
    271 
    272   // Precompute resulting length so we can reserve() memory in one shot.
    273   int length = 0;
    274   for (ITERATOR iter = start; iter != end; ++iter) {
    275     if (iter != start) {
    276       length += delim_length;
    277     }
    278     length += iter->size();
    279   }
    280   result->reserve(length);
    281 
    282   // Now combine everything.
    283   for (ITERATOR iter = start; iter != end; ++iter) {
    284     if (iter != start) {
    285       result->append(delim, delim_length);
    286     }
    287     result->append(iter->data(), iter->size());
    288   }
    289 }
    290 
    291 void JoinStrings(const vector<string>& components,
    292                  const char* delim,
    293                  string * result) {
    294   JoinStringsIterator(components.begin(), components.end(), delim, result);
    295 }
    296 
    297 // ----------------------------------------------------------------------
    298 // UnescapeCEscapeSequences()
    299 //    This does all the unescaping that C does: \ooo, \r, \n, etc
    300 //    Returns length of resulting string.
    301 //    The implementation of \x parses any positive number of hex digits,
    302 //    but it is an error if the value requires more than 8 bits, and the
    303 //    result is truncated to 8 bits.
    304 //
    305 //    The second call stores its errors in a supplied string vector.
    306 //    If the string vector pointer is NULL, it reports the errors with LOG().
    307 // ----------------------------------------------------------------------
    308 
    309 #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
    310 
    311 // Protocol buffers doesn't ever care about errors, but I don't want to remove
    312 // the code.
    313 #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
    314 
    315 int UnescapeCEscapeSequences(const char* source, char* dest) {
    316   return UnescapeCEscapeSequences(source, dest, NULL);
    317 }
    318 
    319 int UnescapeCEscapeSequences(const char* source, char* dest,
    320                              vector<string> *errors) {
    321   GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";
    322 
    323   char* d = dest;
    324   const char* p = source;
    325 
    326   // Small optimization for case where source = dest and there's no escaping
    327   while ( p == d && *p != '\0' && *p != '\\' )
    328     p++, d++;
    329 
    330   while (*p != '\0') {
    331     if (*p != '\\') {
    332       *d++ = *p++;
    333     } else {
    334       switch ( *++p ) {                    // skip past the '\\'
    335         case '\0':
    336           LOG_STRING(ERROR, errors) << "String cannot end with \\";
    337           *d = '\0';
    338           return d - dest;   // we're done with p
    339         case 'a':  *d++ = '\a';  break;
    340         case 'b':  *d++ = '\b';  break;
    341         case 'f':  *d++ = '\f';  break;
    342         case 'n':  *d++ = '\n';  break;
    343         case 'r':  *d++ = '\r';  break;
    344         case 't':  *d++ = '\t';  break;
    345         case 'v':  *d++ = '\v';  break;
    346         case '\\': *d++ = '\\';  break;
    347         case '?':  *d++ = '\?';  break;    // \?  Who knew?
    348         case '\'': *d++ = '\'';  break;
    349         case '"':  *d++ = '\"';  break;
    350         case '0': case '1': case '2': case '3':  // octal digit: 1 to 3 digits
    351         case '4': case '5': case '6': case '7': {
    352           char ch = *p - '0';
    353           if ( IS_OCTAL_DIGIT(p[1]) )
    354             ch = ch * 8 + *++p - '0';
    355           if ( IS_OCTAL_DIGIT(p[1]) )      // safe (and easy) to do this twice
    356             ch = ch * 8 + *++p - '0';      // now points at last digit
    357           *d++ = ch;
    358           break;
    359         }
    360         case 'x': case 'X': {
    361           if (!isxdigit(p[1])) {
    362             if (p[1] == '\0') {
    363               LOG_STRING(ERROR, errors) << "String cannot end with \\x";
    364             } else {
    365               LOG_STRING(ERROR, errors) <<
    366                 "\\x cannot be followed by non-hex digit: \\" << *p << p[1];
    367             }
    368             break;
    369           }
    370           unsigned int ch = 0;
    371           const char *hex_start = p;
    372           while (isxdigit(p[1]))  // arbitrarily many hex digits
    373             ch = (ch << 4) + hex_digit_to_int(*++p);
    374           if (ch > 0xFF)
    375             LOG_STRING(ERROR, errors) << "Value of " <<
    376               "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
    377           *d++ = ch;
    378           break;
    379         }
    380 #if 0  // TODO(kenton):  Support \u and \U?  Requires runetochar().
    381         case 'u': {
    382           // \uhhhh => convert 4 hex digits to UTF-8
    383           char32 rune = 0;
    384           const char *hex_start = p;
    385           for (int i = 0; i < 4; ++i) {
    386             if (isxdigit(p[1])) {  // Look one char ahead.
    387               rune = (rune << 4) + hex_digit_to_int(*++p);  // Advance p.
    388             } else {
    389               LOG_STRING(ERROR, errors)
    390                 << "\\u must be followed by 4 hex digits: \\"
    391                 <<  string(hex_start, p+1-hex_start);
    392               break;
    393             }
    394           }
    395           d += runetochar(d, &rune);
    396           break;
    397         }
    398         case 'U': {
    399           // \Uhhhhhhhh => convert 8 hex digits to UTF-8
    400           char32 rune = 0;
    401           const char *hex_start = p;
    402           for (int i = 0; i < 8; ++i) {
    403             if (isxdigit(p[1])) {  // Look one char ahead.
    404               // Don't change rune until we're sure this
    405               // is within the Unicode limit, but do advance p.
    406               char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
    407               if (newrune > 0x10FFFF) {
    408                 LOG_STRING(ERROR, errors)
    409                   << "Value of \\"
    410                   << string(hex_start, p + 1 - hex_start)
    411                   << " exceeds Unicode limit (0x10FFFF)";
    412                 break;
    413               } else {
    414                 rune = newrune;
    415               }
    416             } else {
    417               LOG_STRING(ERROR, errors)
    418                 << "\\U must be followed by 8 hex digits: \\"
    419                 <<  string(hex_start, p+1-hex_start);
    420               break;
    421             }
    422           }
    423           d += runetochar(d, &rune);
    424           break;
    425         }
    426 #endif
    427         default:
    428           LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
    429       }
    430       p++;                                 // read past letter we escaped
    431     }
    432   }
    433   *d = '\0';
    434   return d - dest;
    435 }
    436 
    437 // ----------------------------------------------------------------------
    438 // UnescapeCEscapeString()
    439 //    This does the same thing as UnescapeCEscapeSequences, but creates
    440 //    a new string. The caller does not need to worry about allocating
    441 //    a dest buffer. This should be used for non performance critical
    442 //    tasks such as printing debug messages. It is safe for src and dest
    443 //    to be the same.
    444 //
    445 //    The second call stores its errors in a supplied string vector.
    446 //    If the string vector pointer is NULL, it reports the errors with LOG().
    447 //
    448 //    In the first and second calls, the length of dest is returned. In the
    449 //    the third call, the new string is returned.
    450 // ----------------------------------------------------------------------
    451 int UnescapeCEscapeString(const string& src, string* dest) {
    452   return UnescapeCEscapeString(src, dest, NULL);
    453 }
    454 
    455 int UnescapeCEscapeString(const string& src, string* dest,
    456                           vector<string> *errors) {
    457   scoped_array<char> unescaped(new char[src.size() + 1]);
    458   int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors);
    459   GOOGLE_CHECK(dest);
    460   dest->assign(unescaped.get(), len);
    461   return len;
    462 }
    463 
    464 string UnescapeCEscapeString(const string& src) {
    465   scoped_array<char> unescaped(new char[src.size() + 1]);
    466   int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL);
    467   return string(unescaped.get(), len);
    468 }
    469 
    470 // ----------------------------------------------------------------------
    471 // CEscapeString()
    472 // CHexEscapeString()
    473 //    Copies 'src' to 'dest', escaping dangerous characters using
    474 //    C-style escape sequences. This is very useful for preparing query
    475 //    flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
    476 //    hexadecimal rather than octal sequences.
    477 //    Returns the number of bytes written to 'dest' (not including the \0)
    478 //    or -1 if there was insufficient space.
    479 //
    480 //    Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
    481 // ----------------------------------------------------------------------
    482 int CEscapeInternal(const char* src, int src_len, char* dest,
    483                     int dest_len, bool use_hex, bool utf8_safe) {
    484   const char* src_end = src + src_len;
    485   int used = 0;
    486   bool last_hex_escape = false; // true if last output char was \xNN
    487 
    488   for (; src < src_end; src++) {
    489     if (dest_len - used < 2)   // Need space for two letter escape
    490       return -1;
    491 
    492     bool is_hex_escape = false;
    493     switch (*src) {
    494       case '\n': dest[used++] = '\\'; dest[used++] = 'n';  break;
    495       case '\r': dest[used++] = '\\'; dest[used++] = 'r';  break;
    496       case '\t': dest[used++] = '\\'; dest[used++] = 't';  break;
    497       case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
    498       case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
    499       case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
    500       default:
    501         // Note that if we emit \xNN and the src character after that is a hex
    502         // digit then that digit must be escaped too to prevent it being
    503         // interpreted as part of the character code by C.
    504         if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) &&
    505             (!isprint(*src) ||
    506              (last_hex_escape && isxdigit(*src)))) {
    507           if (dest_len - used < 4) // need space for 4 letter escape
    508             return -1;
    509           sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"),
    510                   static_cast<uint8>(*src));
    511           is_hex_escape = use_hex;
    512           used += 4;
    513         } else {
    514           dest[used++] = *src; break;
    515         }
    516     }
    517     last_hex_escape = is_hex_escape;
    518   }
    519 
    520   if (dest_len - used < 1)   // make sure that there is room for \0
    521     return -1;
    522 
    523   dest[used] = '\0';   // doesn't count towards return value though
    524   return used;
    525 }
    526 
    527 // Calculates the length of the C-style escaped version of 'src'.
    528 // Assumes that non-printable characters are escaped using octal sequences, and
    529 // that UTF-8 bytes are not handled specially.
    530 static inline size_t CEscapedLength(StringPiece src) {
    531   static char c_escaped_len[256] = {
    532     4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4,  // \t, \n, \r
    533     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    534     1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,  // ", '
    535     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // '0'..'9'
    536     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'A'..'O'
    537     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,  // 'P'..'Z', '\'
    538     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'a'..'o'
    539     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,  // 'p'..'z', DEL
    540     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    541     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    542     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    543     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    544     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    545     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    546     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    547     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    548   };
    549 
    550   size_t escaped_len = 0;
    551   for (int i = 0; i < src.size(); ++i) {
    552     unsigned char c = static_cast<unsigned char>(src[i]);
    553     escaped_len += c_escaped_len[c];
    554   }
    555   return escaped_len;
    556 }
    557 
    558 // ----------------------------------------------------------------------
    559 // Escapes 'src' using C-style escape sequences, and appends the escaped string
    560 // to 'dest'. This version is faster than calling CEscapeInternal as it computes
    561 // the required space using a lookup table, and also does not do any special
    562 // handling for Hex or UTF-8 characters.
    563 // ----------------------------------------------------------------------
    564 void CEscapeAndAppend(StringPiece src, string* dest) {
    565   size_t escaped_len = CEscapedLength(src);
    566   if (escaped_len == src.size()) {
    567     dest->append(src.data(), src.size());
    568     return;
    569   }
    570 
    571   size_t cur_dest_len = dest->size();
    572   dest->resize(cur_dest_len + escaped_len);
    573   char* append_ptr = &(*dest)[cur_dest_len];
    574 
    575   for (int i = 0; i < src.size(); ++i) {
    576     unsigned char c = static_cast<unsigned char>(src[i]);
    577     switch (c) {
    578       case '\n': *append_ptr++ = '\\'; *append_ptr++ = 'n'; break;
    579       case '\r': *append_ptr++ = '\\'; *append_ptr++ = 'r'; break;
    580       case '\t': *append_ptr++ = '\\'; *append_ptr++ = 't'; break;
    581       case '\"': *append_ptr++ = '\\'; *append_ptr++ = '\"'; break;
    582       case '\'': *append_ptr++ = '\\'; *append_ptr++ = '\''; break;
    583       case '\\': *append_ptr++ = '\\'; *append_ptr++ = '\\'; break;
    584       default:
    585         if (!isprint(c)) {
    586           *append_ptr++ = '\\';
    587           *append_ptr++ = '0' + c / 64;
    588           *append_ptr++ = '0' + (c % 64) / 8;
    589           *append_ptr++ = '0' + c % 8;
    590         } else {
    591           *append_ptr++ = c;
    592         }
    593         break;
    594     }
    595   }
    596 }
    597 
    598 string CEscape(const string& src) {
    599   string dest;
    600   CEscapeAndAppend(src, &dest);
    601   return dest;
    602 }
    603 
    604 namespace strings {
    605 
    606 string Utf8SafeCEscape(const string& src) {
    607   const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
    608   scoped_array<char> dest(new char[dest_length]);
    609   const int len = CEscapeInternal(src.data(), src.size(),
    610                                   dest.get(), dest_length, false, true);
    611   GOOGLE_DCHECK_GE(len, 0);
    612   return string(dest.get(), len);
    613 }
    614 
    615 string CHexEscape(const string& src) {
    616   const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
    617   scoped_array<char> dest(new char[dest_length]);
    618   const int len = CEscapeInternal(src.data(), src.size(),
    619                                   dest.get(), dest_length, true, false);
    620   GOOGLE_DCHECK_GE(len, 0);
    621   return string(dest.get(), len);
    622 }
    623 
    624 }  // namespace strings
    625 
    626 // ----------------------------------------------------------------------
    627 // strto32_adaptor()
    628 // strtou32_adaptor()
    629 //    Implementation of strto[u]l replacements that have identical
    630 //    overflow and underflow characteristics for both ILP-32 and LP-64
    631 //    platforms, including errno preservation in error-free calls.
    632 // ----------------------------------------------------------------------
    633 
    634 int32 strto32_adaptor(const char *nptr, char **endptr, int base) {
    635   const int saved_errno = errno;
    636   errno = 0;
    637   const long result = strtol(nptr, endptr, base);
    638   if (errno == ERANGE && result == LONG_MIN) {
    639     return kint32min;
    640   } else if (errno == ERANGE && result == LONG_MAX) {
    641     return kint32max;
    642   } else if (errno == 0 && result < kint32min) {
    643     errno = ERANGE;
    644     return kint32min;
    645   } else if (errno == 0 && result > kint32max) {
    646     errno = ERANGE;
    647     return kint32max;
    648   }
    649   if (errno == 0)
    650     errno = saved_errno;
    651   return static_cast<int32>(result);
    652 }
    653 
    654 uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) {
    655   const int saved_errno = errno;
    656   errno = 0;
    657   const unsigned long result = strtoul(nptr, endptr, base);
    658   if (errno == ERANGE && result == ULONG_MAX) {
    659     return kuint32max;
    660   } else if (errno == 0 && result > kuint32max) {
    661     errno = ERANGE;
    662     return kuint32max;
    663   }
    664   if (errno == 0)
    665     errno = saved_errno;
    666   return static_cast<uint32>(result);
    667 }
    668 
    669 inline bool safe_parse_sign(string* text  /*inout*/,
    670                             bool* negative_ptr  /*output*/) {
    671   const char* start = text->data();
    672   const char* end = start + text->size();
    673 
    674   // Consume whitespace.
    675   while (start < end && (start[0] == ' ')) {
    676     ++start;
    677   }
    678   while (start < end && (end[-1] == ' ')) {
    679     --end;
    680   }
    681   if (start >= end) {
    682     return false;
    683   }
    684 
    685   // Consume sign.
    686   *negative_ptr = (start[0] == '-');
    687   if (*negative_ptr || start[0] == '+') {
    688     ++start;
    689     if (start >= end) {
    690       return false;
    691     }
    692   }
    693   *text = text->substr(start - text->data(), end - start);
    694   return true;
    695 }
    696 
    697 template<typename IntType>
    698 bool safe_parse_positive_int(
    699     string text, IntType* value_p) {
    700   int base = 10;
    701   IntType value = 0;
    702   const IntType vmax = std::numeric_limits<IntType>::max();
    703   assert(vmax > 0);
    704   assert(vmax >= base);
    705   const IntType vmax_over_base = vmax / base;
    706   const char* start = text.data();
    707   const char* end = start + text.size();
    708   // loop over digits
    709   for (; start < end; ++start) {
    710     unsigned char c = static_cast<unsigned char>(start[0]);
    711     int digit = c - '0';
    712     if (digit >= base || digit < 0) {
    713       *value_p = value;
    714       return false;
    715     }
    716     if (value > vmax_over_base) {
    717       *value_p = vmax;
    718       return false;
    719     }
    720     value *= base;
    721     if (value > vmax - digit) {
    722       *value_p = vmax;
    723       return false;
    724     }
    725     value += digit;
    726   }
    727   *value_p = value;
    728   return true;
    729 }
    730 
    731 template<typename IntType>
    732 bool safe_parse_negative_int(
    733     const string& text, IntType* value_p) {
    734   int base = 10;
    735   IntType value = 0;
    736   const IntType vmin = std::numeric_limits<IntType>::min();
    737   assert(vmin < 0);
    738   assert(vmin <= 0 - base);
    739   IntType vmin_over_base = vmin / base;
    740   // 2003 c++ standard [expr.mul]
    741   // "... the sign of the remainder is implementation-defined."
    742   // Although (vmin/base)*base + vmin%base is always vmin.
    743   // 2011 c++ standard tightens the spec but we cannot rely on it.
    744   if (vmin % base > 0) {
    745     vmin_over_base += 1;
    746   }
    747   const char* start = text.data();
    748   const char* end = start + text.size();
    749   // loop over digits
    750   for (; start < end; ++start) {
    751     unsigned char c = static_cast<unsigned char>(start[0]);
    752     int digit = c - '0';
    753     if (digit >= base || digit < 0) {
    754       *value_p = value;
    755       return false;
    756     }
    757     if (value < vmin_over_base) {
    758       *value_p = vmin;
    759       return false;
    760     }
    761     value *= base;
    762     if (value < vmin + digit) {
    763       *value_p = vmin;
    764       return false;
    765     }
    766     value -= digit;
    767   }
    768   *value_p = value;
    769   return true;
    770 }
    771 
    772 template<typename IntType>
    773 bool safe_int_internal(string text, IntType* value_p) {
    774   *value_p = 0;
    775   bool negative;
    776   if (!safe_parse_sign(&text, &negative)) {
    777     return false;
    778   }
    779   if (!negative) {
    780     return safe_parse_positive_int(text, value_p);
    781   } else {
    782     return safe_parse_negative_int(text, value_p);
    783   }
    784 }
    785 
    786 template<typename IntType>
    787 bool safe_uint_internal(string text, IntType* value_p) {
    788   *value_p = 0;
    789   bool negative;
    790   if (!safe_parse_sign(&text, &negative) || negative) {
    791     return false;
    792   }
    793   return safe_parse_positive_int(text, value_p);
    794 }
    795 
    796 // ----------------------------------------------------------------------
    797 // FastIntToBuffer()
    798 // FastInt64ToBuffer()
    799 // FastHexToBuffer()
    800 // FastHex64ToBuffer()
    801 // FastHex32ToBuffer()
    802 // ----------------------------------------------------------------------
    803 
    804 // Offset into buffer where FastInt64ToBuffer places the end of string
    805 // null character.  Also used by FastInt64ToBufferLeft.
    806 static const int kFastInt64ToBufferOffset = 21;
    807 
    808 char *FastInt64ToBuffer(int64 i, char* buffer) {
    809   // We could collapse the positive and negative sections, but that
    810   // would be slightly slower for positive numbers...
    811   // 22 bytes is enough to store -2**64, -18446744073709551616.
    812   char* p = buffer + kFastInt64ToBufferOffset;
    813   *p-- = '\0';
    814   if (i >= 0) {
    815     do {
    816       *p-- = '0' + i % 10;
    817       i /= 10;
    818     } while (i > 0);
    819     return p + 1;
    820   } else {
    821     // On different platforms, % and / have different behaviors for
    822     // negative numbers, so we need to jump through hoops to make sure
    823     // we don't divide negative numbers.
    824     if (i > -10) {
    825       i = -i;
    826       *p-- = '0' + i;
    827       *p = '-';
    828       return p;
    829     } else {
    830       // Make sure we aren't at MIN_INT, in which case we can't say i = -i
    831       i = i + 10;
    832       i = -i;
    833       *p-- = '0' + i % 10;
    834       // Undo what we did a moment ago
    835       i = i / 10 + 1;
    836       do {
    837         *p-- = '0' + i % 10;
    838         i /= 10;
    839       } while (i > 0);
    840       *p = '-';
    841       return p;
    842     }
    843   }
    844 }
    845 
    846 // Offset into buffer where FastInt32ToBuffer places the end of string
    847 // null character.  Also used by FastInt32ToBufferLeft
    848 static const int kFastInt32ToBufferOffset = 11;
    849 
    850 // Yes, this is a duplicate of FastInt64ToBuffer.  But, we need this for the
    851 // compiler to generate 32 bit arithmetic instructions.  It's much faster, at
    852 // least with 32 bit binaries.
    853 char *FastInt32ToBuffer(int32 i, char* buffer) {
    854   // We could collapse the positive and negative sections, but that
    855   // would be slightly slower for positive numbers...
    856   // 12 bytes is enough to store -2**32, -4294967296.
    857   char* p = buffer + kFastInt32ToBufferOffset;
    858   *p-- = '\0';
    859   if (i >= 0) {
    860     do {
    861       *p-- = '0' + i % 10;
    862       i /= 10;
    863     } while (i > 0);
    864     return p + 1;
    865   } else {
    866     // On different platforms, % and / have different behaviors for
    867     // negative numbers, so we need to jump through hoops to make sure
    868     // we don't divide negative numbers.
    869     if (i > -10) {
    870       i = -i;
    871       *p-- = '0' + i;
    872       *p = '-';
    873       return p;
    874     } else {
    875       // Make sure we aren't at MIN_INT, in which case we can't say i = -i
    876       i = i + 10;
    877       i = -i;
    878       *p-- = '0' + i % 10;
    879       // Undo what we did a moment ago
    880       i = i / 10 + 1;
    881       do {
    882         *p-- = '0' + i % 10;
    883         i /= 10;
    884       } while (i > 0);
    885       *p = '-';
    886       return p;
    887     }
    888   }
    889 }
    890 
    891 char *FastHexToBuffer(int i, char* buffer) {
    892   GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
    893 
    894   static const char *hexdigits = "0123456789abcdef";
    895   char *p = buffer + 21;
    896   *p-- = '\0';
    897   do {
    898     *p-- = hexdigits[i & 15];   // mod by 16
    899     i >>= 4;                    // divide by 16
    900   } while (i > 0);
    901   return p + 1;
    902 }
    903 
    904 char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
    905   static const char *hexdigits = "0123456789abcdef";
    906   buffer[num_byte] = '\0';
    907   for (int i = num_byte - 1; i >= 0; i--) {
    908 #ifdef _M_X64
    909     // MSVC x64 platform has a bug optimizing the uint32(value) in the #else
    910     // block. Given that the uint32 cast was to improve performance on 32-bit
    911     // platforms, we use 64-bit '&' directly.
    912     buffer[i] = hexdigits[value & 0xf];
    913 #else
    914     buffer[i] = hexdigits[uint32(value) & 0xf];
    915 #endif
    916     value >>= 4;
    917   }
    918   return buffer;
    919 }
    920 
    921 char *FastHex64ToBuffer(uint64 value, char* buffer) {
    922   return InternalFastHexToBuffer(value, buffer, 16);
    923 }
    924 
    925 char *FastHex32ToBuffer(uint32 value, char* buffer) {
    926   return InternalFastHexToBuffer(value, buffer, 8);
    927 }
    928 
    929 // ----------------------------------------------------------------------
    930 // FastInt32ToBufferLeft()
    931 // FastUInt32ToBufferLeft()
    932 // FastInt64ToBufferLeft()
    933 // FastUInt64ToBufferLeft()
    934 //
    935 // Like the Fast*ToBuffer() functions above, these are intended for speed.
    936 // Unlike the Fast*ToBuffer() functions, however, these functions write
    937 // their output to the beginning of the buffer (hence the name, as the
    938 // output is left-aligned).  The caller is responsible for ensuring that
    939 // the buffer has enough space to hold the output.
    940 //
    941 // Returns a pointer to the end of the string (i.e. the null character
    942 // terminating the string).
    943 // ----------------------------------------------------------------------
    944 
    945 static const char two_ASCII_digits[100][2] = {
    946   {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
    947   {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
    948   {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
    949   {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
    950   {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
    951   {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
    952   {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
    953   {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
    954   {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
    955   {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
    956   {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
    957   {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
    958   {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
    959   {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
    960   {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
    961   {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
    962   {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
    963   {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
    964   {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
    965   {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
    966 };
    967 
    968 char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
    969   int digits;
    970   const char *ASCII_digits = NULL;
    971   // The idea of this implementation is to trim the number of divides to as few
    972   // as possible by using multiplication and subtraction rather than mod (%),
    973   // and by outputting two digits at a time rather than one.
    974   // The huge-number case is first, in the hopes that the compiler will output
    975   // that case in one branch-free block of code, and only output conditional
    976   // branches into it from below.
    977   if (u >= 1000000000) {  // >= 1,000,000,000
    978     digits = u / 100000000;  // 100,000,000
    979     ASCII_digits = two_ASCII_digits[digits];
    980     buffer[0] = ASCII_digits[0];
    981     buffer[1] = ASCII_digits[1];
    982     buffer += 2;
    983 sublt100_000_000:
    984     u -= digits * 100000000;  // 100,000,000
    985 lt100_000_000:
    986     digits = u / 1000000;  // 1,000,000
    987     ASCII_digits = two_ASCII_digits[digits];
    988     buffer[0] = ASCII_digits[0];
    989     buffer[1] = ASCII_digits[1];
    990     buffer += 2;
    991 sublt1_000_000:
    992     u -= digits * 1000000;  // 1,000,000
    993 lt1_000_000:
    994     digits = u / 10000;  // 10,000
    995     ASCII_digits = two_ASCII_digits[digits];
    996     buffer[0] = ASCII_digits[0];
    997     buffer[1] = ASCII_digits[1];
    998     buffer += 2;
    999 sublt10_000:
   1000     u -= digits * 10000;  // 10,000
   1001 lt10_000:
   1002     digits = u / 100;
   1003     ASCII_digits = two_ASCII_digits[digits];
   1004     buffer[0] = ASCII_digits[0];
   1005     buffer[1] = ASCII_digits[1];
   1006     buffer += 2;
   1007 sublt100:
   1008     u -= digits * 100;
   1009 lt100:
   1010     digits = u;
   1011     ASCII_digits = two_ASCII_digits[digits];
   1012     buffer[0] = ASCII_digits[0];
   1013     buffer[1] = ASCII_digits[1];
   1014     buffer += 2;
   1015 done:
   1016     *buffer = 0;
   1017     return buffer;
   1018   }
   1019 
   1020   if (u < 100) {
   1021     digits = u;
   1022     if (u >= 10) goto lt100;
   1023     *buffer++ = '0' + digits;
   1024     goto done;
   1025   }
   1026   if (u  <  10000) {   // 10,000
   1027     if (u >= 1000) goto lt10_000;
   1028     digits = u / 100;
   1029     *buffer++ = '0' + digits;
   1030     goto sublt100;
   1031   }
   1032   if (u  <  1000000) {   // 1,000,000
   1033     if (u >= 100000) goto lt1_000_000;
   1034     digits = u / 10000;  //    10,000
   1035     *buffer++ = '0' + digits;
   1036     goto sublt10_000;
   1037   }
   1038   if (u  <  100000000) {   // 100,000,000
   1039     if (u >= 10000000) goto lt100_000_000;
   1040     digits = u / 1000000;  //   1,000,000
   1041     *buffer++ = '0' + digits;
   1042     goto sublt1_000_000;
   1043   }
   1044   // we already know that u < 1,000,000,000
   1045   digits = u / 100000000;   // 100,000,000
   1046   *buffer++ = '0' + digits;
   1047   goto sublt100_000_000;
   1048 }
   1049 
   1050 char* FastInt32ToBufferLeft(int32 i, char* buffer) {
   1051   uint32 u = i;
   1052   if (i < 0) {
   1053     *buffer++ = '-';
   1054     u = -i;
   1055   }
   1056   return FastUInt32ToBufferLeft(u, buffer);
   1057 }
   1058 
   1059 char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
   1060   int digits;
   1061   const char *ASCII_digits = NULL;
   1062 
   1063   uint32 u = static_cast<uint32>(u64);
   1064   if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
   1065 
   1066   uint64 top_11_digits = u64 / 1000000000;
   1067   buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
   1068   u = u64 - (top_11_digits * 1000000000);
   1069 
   1070   digits = u / 10000000;  // 10,000,000
   1071   GOOGLE_DCHECK_LT(digits, 100);
   1072   ASCII_digits = two_ASCII_digits[digits];
   1073   buffer[0] = ASCII_digits[0];
   1074   buffer[1] = ASCII_digits[1];
   1075   buffer += 2;
   1076   u -= digits * 10000000;  // 10,000,000
   1077   digits = u / 100000;  // 100,000
   1078   ASCII_digits = two_ASCII_digits[digits];
   1079   buffer[0] = ASCII_digits[0];
   1080   buffer[1] = ASCII_digits[1];
   1081   buffer += 2;
   1082   u -= digits * 100000;  // 100,000
   1083   digits = u / 1000;  // 1,000
   1084   ASCII_digits = two_ASCII_digits[digits];
   1085   buffer[0] = ASCII_digits[0];
   1086   buffer[1] = ASCII_digits[1];
   1087   buffer += 2;
   1088   u -= digits * 1000;  // 1,000
   1089   digits = u / 10;
   1090   ASCII_digits = two_ASCII_digits[digits];
   1091   buffer[0] = ASCII_digits[0];
   1092   buffer[1] = ASCII_digits[1];
   1093   buffer += 2;
   1094   u -= digits * 10;
   1095   digits = u;
   1096   *buffer++ = '0' + digits;
   1097   *buffer = 0;
   1098   return buffer;
   1099 }
   1100 
   1101 char* FastInt64ToBufferLeft(int64 i, char* buffer) {
   1102   uint64 u = i;
   1103   if (i < 0) {
   1104     *buffer++ = '-';
   1105     u = -i;
   1106   }
   1107   return FastUInt64ToBufferLeft(u, buffer);
   1108 }
   1109 
   1110 // ----------------------------------------------------------------------
   1111 // SimpleItoa()
   1112 //    Description: converts an integer to a string.
   1113 //
   1114 //    Return value: string
   1115 // ----------------------------------------------------------------------
   1116 
   1117 string SimpleItoa(int i) {
   1118   char buffer[kFastToBufferSize];
   1119   return (sizeof(i) == 4) ?
   1120     FastInt32ToBuffer(i, buffer) :
   1121     FastInt64ToBuffer(i, buffer);
   1122 }
   1123 
   1124 string SimpleItoa(unsigned int i) {
   1125   char buffer[kFastToBufferSize];
   1126   return string(buffer, (sizeof(i) == 4) ?
   1127     FastUInt32ToBufferLeft(i, buffer) :
   1128     FastUInt64ToBufferLeft(i, buffer));
   1129 }
   1130 
   1131 string SimpleItoa(long i) {
   1132   char buffer[kFastToBufferSize];
   1133   return (sizeof(i) == 4) ?
   1134     FastInt32ToBuffer(i, buffer) :
   1135     FastInt64ToBuffer(i, buffer);
   1136 }
   1137 
   1138 string SimpleItoa(unsigned long i) {
   1139   char buffer[kFastToBufferSize];
   1140   return string(buffer, (sizeof(i) == 4) ?
   1141     FastUInt32ToBufferLeft(i, buffer) :
   1142     FastUInt64ToBufferLeft(i, buffer));
   1143 }
   1144 
   1145 string SimpleItoa(long long i) {
   1146   char buffer[kFastToBufferSize];
   1147   return (sizeof(i) == 4) ?
   1148     FastInt32ToBuffer(i, buffer) :
   1149     FastInt64ToBuffer(i, buffer);
   1150 }
   1151 
   1152 string SimpleItoa(unsigned long long i) {
   1153   char buffer[kFastToBufferSize];
   1154   return string(buffer, (sizeof(i) == 4) ?
   1155     FastUInt32ToBufferLeft(i, buffer) :
   1156     FastUInt64ToBufferLeft(i, buffer));
   1157 }
   1158 
   1159 // ----------------------------------------------------------------------
   1160 // SimpleDtoa()
   1161 // SimpleFtoa()
   1162 // DoubleToBuffer()
   1163 // FloatToBuffer()
   1164 //    We want to print the value without losing precision, but we also do
   1165 //    not want to print more digits than necessary.  This turns out to be
   1166 //    trickier than it sounds.  Numbers like 0.2 cannot be represented
   1167 //    exactly in binary.  If we print 0.2 with a very large precision,
   1168 //    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
   1169 //    On the other hand, if we set the precision too low, we lose
   1170 //    significant digits when printing numbers that actually need them.
   1171 //    It turns out there is no precision value that does the right thing
   1172 //    for all numbers.
   1173 //
   1174 //    Our strategy is to first try printing with a precision that is never
   1175 //    over-precise, then parse the result with strtod() to see if it
   1176 //    matches.  If not, we print again with a precision that will always
   1177 //    give a precise result, but may use more digits than necessary.
   1178 //
   1179 //    An arguably better strategy would be to use the algorithm described
   1180 //    in "How to Print Floating-Point Numbers Accurately" by Steele &
   1181 //    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
   1182 //    however, that the following implementation is about as fast as
   1183 //    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
   1184 //    will not scale well on multi-core machines.  DMG's code is slightly
   1185 //    more accurate (in that it will never use more digits than
   1186 //    necessary), but this is probably irrelevant for most users.
   1187 //
   1188 //    Rob Pike and Ken Thompson also have an implementation of dtoa() in
   1189 //    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
   1190 //    one in that it makes guesses and then uses strtod() to check them.
   1191 //    Their implementation is faster because they use their own code to
   1192 //    generate the digits in the first place rather than use snprintf(),
   1193 //    thus avoiding format string parsing overhead.  However, this makes
   1194 //    it considerably more complicated than the following implementation,
   1195 //    and it is embedded in a larger library.  If speed turns out to be
   1196 //    an issue, we could re-implement this in terms of their
   1197 //    implementation.
   1198 // ----------------------------------------------------------------------
   1199 
   1200 string SimpleDtoa(double value) {
   1201   char buffer[kDoubleToBufferSize];
   1202   return DoubleToBuffer(value, buffer);
   1203 }
   1204 
   1205 string SimpleFtoa(float value) {
   1206   char buffer[kFloatToBufferSize];
   1207   return FloatToBuffer(value, buffer);
   1208 }
   1209 
   1210 static inline bool IsValidFloatChar(char c) {
   1211   return ('0' <= c && c <= '9') ||
   1212          c == 'e' || c == 'E' ||
   1213          c == '+' || c == '-';
   1214 }
   1215 
   1216 void DelocalizeRadix(char* buffer) {
   1217   // Fast check:  if the buffer has a normal decimal point, assume no
   1218   // translation is needed.
   1219   if (strchr(buffer, '.') != NULL) return;
   1220 
   1221   // Find the first unknown character.
   1222   while (IsValidFloatChar(*buffer)) ++buffer;
   1223 
   1224   if (*buffer == '\0') {
   1225     // No radix character found.
   1226     return;
   1227   }
   1228 
   1229   // We are now pointing at the locale-specific radix character.  Replace it
   1230   // with '.'.
   1231   *buffer = '.';
   1232   ++buffer;
   1233 
   1234   if (!IsValidFloatChar(*buffer) && *buffer != '\0') {
   1235     // It appears the radix was a multi-byte character.  We need to remove the
   1236     // extra bytes.
   1237     char* target = buffer;
   1238     do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0');
   1239     memmove(target, buffer, strlen(buffer) + 1);
   1240   }
   1241 }
   1242 
   1243 char* DoubleToBuffer(double value, char* buffer) {
   1244   // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
   1245   // platforms these days.  Just in case some system exists where DBL_DIG
   1246   // is significantly larger -- and risks overflowing our buffer -- we have
   1247   // this assert.
   1248   GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
   1249 
   1250   if (value == numeric_limits<double>::infinity()) {
   1251     strcpy(buffer, "inf");
   1252     return buffer;
   1253   } else if (value == -numeric_limits<double>::infinity()) {
   1254     strcpy(buffer, "-inf");
   1255     return buffer;
   1256   } else if (MathLimits<double>::IsNaN(value)) {
   1257     strcpy(buffer, "nan");
   1258     return buffer;
   1259   }
   1260 
   1261   int snprintf_result =
   1262     snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
   1263 
   1264   // The snprintf should never overflow because the buffer is significantly
   1265   // larger than the precision we asked for.
   1266   GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
   1267 
   1268   // We need to make parsed_value volatile in order to force the compiler to
   1269   // write it out to the stack.  Otherwise, it may keep the value in a
   1270   // register, and if it does that, it may keep it as a long double instead
   1271   // of a double.  This long double may have extra bits that make it compare
   1272   // unequal to "value" even though it would be exactly equal if it were
   1273   // truncated to a double.
   1274   volatile double parsed_value = strtod(buffer, NULL);
   1275   if (parsed_value != value) {
   1276     int snprintf_result =
   1277       snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
   1278 
   1279     // Should never overflow; see above.
   1280     GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
   1281   }
   1282 
   1283   DelocalizeRadix(buffer);
   1284   return buffer;
   1285 }
   1286 
   1287 static int memcasecmp(const char *s1, const char *s2, size_t len) {
   1288   const unsigned char *us1 = reinterpret_cast<const unsigned char *>(s1);
   1289   const unsigned char *us2 = reinterpret_cast<const unsigned char *>(s2);
   1290 
   1291   for ( int i = 0; i < len; i++ ) {
   1292     const int diff =
   1293       static_cast<int>(static_cast<unsigned char>(ascii_tolower(us1[i]))) -
   1294       static_cast<int>(static_cast<unsigned char>(ascii_tolower(us2[i])));
   1295     if (diff != 0) return diff;
   1296   }
   1297   return 0;
   1298 }
   1299 
   1300 inline bool CaseEqual(StringPiece s1, StringPiece s2) {
   1301   if (s1.size() != s2.size()) return false;
   1302   return memcasecmp(s1.data(), s2.data(), s1.size()) == 0;
   1303 }
   1304 
   1305 bool safe_strtob(StringPiece str, bool* value) {
   1306   GOOGLE_CHECK(value != NULL) << "NULL output boolean given.";
   1307   if (CaseEqual(str, "true") || CaseEqual(str, "t") ||
   1308       CaseEqual(str, "yes") || CaseEqual(str, "y") ||
   1309       CaseEqual(str, "1")) {
   1310     *value = true;
   1311     return true;
   1312   }
   1313   if (CaseEqual(str, "false") || CaseEqual(str, "f") ||
   1314       CaseEqual(str, "no") || CaseEqual(str, "n") ||
   1315       CaseEqual(str, "0")) {
   1316     *value = false;
   1317     return true;
   1318   }
   1319   return false;
   1320 }
   1321 
   1322 bool safe_strtof(const char* str, float* value) {
   1323   char* endptr;
   1324   errno = 0;  // errno only gets set on errors
   1325 #if defined(_WIN32) || defined (__hpux)  // has no strtof()
   1326   *value = strtod(str, &endptr);
   1327 #else
   1328   *value = strtof(str, &endptr);
   1329 #endif
   1330   return *str != 0 && *endptr == 0 && errno == 0;
   1331 }
   1332 
   1333 bool safe_strtod(const char* str, double* value) {
   1334   char* endptr;
   1335   *value = strtod(str, &endptr);
   1336   if (endptr != str) {
   1337     while (ascii_isspace(*endptr)) ++endptr;
   1338   }
   1339   // Ignore range errors from strtod.  The values it
   1340   // returns on underflow and overflow are the right
   1341   // fallback in a robust setting.
   1342   return *str != '\0' && *endptr == '\0';
   1343 }
   1344 
   1345 bool safe_strto32(const string& str, int32* value) {
   1346   return safe_int_internal(str, value);
   1347 }
   1348 
   1349 bool safe_strtou32(const string& str, uint32* value) {
   1350   return safe_uint_internal(str, value);
   1351 }
   1352 
   1353 bool safe_strto64(const string& str, int64* value) {
   1354   return safe_int_internal(str, value);
   1355 }
   1356 
   1357 bool safe_strtou64(const string& str, uint64* value) {
   1358   return safe_uint_internal(str, value);
   1359 }
   1360 
   1361 char* FloatToBuffer(float value, char* buffer) {
   1362   // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
   1363   // platforms these days.  Just in case some system exists where FLT_DIG
   1364   // is significantly larger -- and risks overflowing our buffer -- we have
   1365   // this assert.
   1366   GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
   1367 
   1368   if (value == numeric_limits<double>::infinity()) {
   1369     strcpy(buffer, "inf");
   1370     return buffer;
   1371   } else if (value == -numeric_limits<double>::infinity()) {
   1372     strcpy(buffer, "-inf");
   1373     return buffer;
   1374   } else if (MathLimits<float>::IsNaN(value)) {
   1375     strcpy(buffer, "nan");
   1376     return buffer;
   1377   }
   1378 
   1379   int snprintf_result =
   1380     snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
   1381 
   1382   // The snprintf should never overflow because the buffer is significantly
   1383   // larger than the precision we asked for.
   1384   GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
   1385 
   1386   float parsed_value;
   1387   if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
   1388     int snprintf_result =
   1389       snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value);
   1390 
   1391     // Should never overflow; see above.
   1392     GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
   1393   }
   1394 
   1395   DelocalizeRadix(buffer);
   1396   return buffer;
   1397 }
   1398 
   1399 namespace strings {
   1400 
   1401 AlphaNum::AlphaNum(strings::Hex hex) {
   1402   char *const end = &digits[kFastToBufferSize];
   1403   char *writer = end;
   1404   uint64 value = hex.value;
   1405   uint64 width = hex.spec;
   1406   // We accomplish minimum width by OR'ing in 0x10000 to the user's value,
   1407   // where 0x10000 is the smallest hex number that is as wide as the user
   1408   // asked for.
   1409   uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value;
   1410   static const char hexdigits[] = "0123456789abcdef";
   1411   do {
   1412     *--writer = hexdigits[value & 0xF];
   1413     value >>= 4;
   1414     mask >>= 4;
   1415   } while (mask != 0);
   1416   piece_data_ = writer;
   1417   piece_size_ = end - writer;
   1418 }
   1419 
   1420 }  // namespace strings
   1421 
   1422 // ----------------------------------------------------------------------
   1423 // StrCat()
   1424 //    This merges the given strings or integers, with no delimiter.  This
   1425 //    is designed to be the fastest possible way to construct a string out
   1426 //    of a mix of raw C strings, C++ strings, and integer values.
   1427 // ----------------------------------------------------------------------
   1428 
   1429 // Append is merely a version of memcpy that returns the address of the byte
   1430 // after the area just overwritten.  It comes in multiple flavors to minimize
   1431 // call overhead.
   1432 static char *Append1(char *out, const AlphaNum &x) {
   1433   memcpy(out, x.data(), x.size());
   1434   return out + x.size();
   1435 }
   1436 
   1437 static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) {
   1438   memcpy(out, x1.data(), x1.size());
   1439   out += x1.size();
   1440 
   1441   memcpy(out, x2.data(), x2.size());
   1442   return out + x2.size();
   1443 }
   1444 
   1445 static char *Append4(char *out,
   1446                      const AlphaNum &x1, const AlphaNum &x2,
   1447                      const AlphaNum &x3, const AlphaNum &x4) {
   1448   memcpy(out, x1.data(), x1.size());
   1449   out += x1.size();
   1450 
   1451   memcpy(out, x2.data(), x2.size());
   1452   out += x2.size();
   1453 
   1454   memcpy(out, x3.data(), x3.size());
   1455   out += x3.size();
   1456 
   1457   memcpy(out, x4.data(), x4.size());
   1458   return out + x4.size();
   1459 }
   1460 
   1461 string StrCat(const AlphaNum &a, const AlphaNum &b) {
   1462   string result;
   1463   result.resize(a.size() + b.size());
   1464   char *const begin = &*result.begin();
   1465   char *out = Append2(begin, a, b);
   1466   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1467   return result;
   1468 }
   1469 
   1470 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
   1471   string result;
   1472   result.resize(a.size() + b.size() + c.size());
   1473   char *const begin = &*result.begin();
   1474   char *out = Append2(begin, a, b);
   1475   out = Append1(out, c);
   1476   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1477   return result;
   1478 }
   1479 
   1480 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
   1481               const AlphaNum &d) {
   1482   string result;
   1483   result.resize(a.size() + b.size() + c.size() + d.size());
   1484   char *const begin = &*result.begin();
   1485   char *out = Append4(begin, a, b, c, d);
   1486   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1487   return result;
   1488 }
   1489 
   1490 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
   1491               const AlphaNum &d, const AlphaNum &e) {
   1492   string result;
   1493   result.resize(a.size() + b.size() + c.size() + d.size() + e.size());
   1494   char *const begin = &*result.begin();
   1495   char *out = Append4(begin, a, b, c, d);
   1496   out = Append1(out, e);
   1497   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1498   return result;
   1499 }
   1500 
   1501 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
   1502               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {
   1503   string result;
   1504   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
   1505                 f.size());
   1506   char *const begin = &*result.begin();
   1507   char *out = Append4(begin, a, b, c, d);
   1508   out = Append2(out, e, f);
   1509   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1510   return result;
   1511 }
   1512 
   1513 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
   1514               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
   1515               const AlphaNum &g) {
   1516   string result;
   1517   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
   1518                 f.size() + g.size());
   1519   char *const begin = &*result.begin();
   1520   char *out = Append4(begin, a, b, c, d);
   1521   out = Append2(out, e, f);
   1522   out = Append1(out, g);
   1523   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1524   return result;
   1525 }
   1526 
   1527 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
   1528               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
   1529               const AlphaNum &g, const AlphaNum &h) {
   1530   string result;
   1531   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
   1532                 f.size() + g.size() + h.size());
   1533   char *const begin = &*result.begin();
   1534   char *out = Append4(begin, a, b, c, d);
   1535   out = Append4(out, e, f, g, h);
   1536   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1537   return result;
   1538 }
   1539 
   1540 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
   1541               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
   1542               const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {
   1543   string result;
   1544   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
   1545                 f.size() + g.size() + h.size() + i.size());
   1546   char *const begin = &*result.begin();
   1547   char *out = Append4(begin, a, b, c, d);
   1548   out = Append4(out, e, f, g, h);
   1549   out = Append1(out, i);
   1550   GOOGLE_DCHECK_EQ(out, begin + result.size());
   1551   return result;
   1552 }
   1553 
   1554 // It's possible to call StrAppend with a char * pointer that is partway into
   1555 // the string we're appending to.  However the results of this are random.
   1556 // Therefore, check for this in debug mode.  Use unsigned math so we only have
   1557 // to do one comparison.
   1558 #define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \
   1559     GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \
   1560                      uintptr_t((dest).size()))
   1561 
   1562 void StrAppend(string *result, const AlphaNum &a) {
   1563   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
   1564   result->append(a.data(), a.size());
   1565 }
   1566 
   1567 void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {
   1568   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
   1569   GOOGLE_DCHECK_NO_OVERLAP(*result, b);
   1570   string::size_type old_size = result->size();
   1571   result->resize(old_size + a.size() + b.size());
   1572   char *const begin = &*result->begin();
   1573   char *out = Append2(begin + old_size, a, b);
   1574   GOOGLE_DCHECK_EQ(out, begin + result->size());
   1575 }
   1576 
   1577 void StrAppend(string *result,
   1578                const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
   1579   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
   1580   GOOGLE_DCHECK_NO_OVERLAP(*result, b);
   1581   GOOGLE_DCHECK_NO_OVERLAP(*result, c);
   1582   string::size_type old_size = result->size();
   1583   result->resize(old_size + a.size() + b.size() + c.size());
   1584   char *const begin = &*result->begin();
   1585   char *out = Append2(begin + old_size, a, b);
   1586   out = Append1(out, c);
   1587   GOOGLE_DCHECK_EQ(out, begin + result->size());
   1588 }
   1589 
   1590 void StrAppend(string *result,
   1591                const AlphaNum &a, const AlphaNum &b,
   1592                const AlphaNum &c, const AlphaNum &d) {
   1593   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
   1594   GOOGLE_DCHECK_NO_OVERLAP(*result, b);
   1595   GOOGLE_DCHECK_NO_OVERLAP(*result, c);
   1596   GOOGLE_DCHECK_NO_OVERLAP(*result, d);
   1597   string::size_type old_size = result->size();
   1598   result->resize(old_size + a.size() + b.size() + c.size() + d.size());
   1599   char *const begin = &*result->begin();
   1600   char *out = Append4(begin + old_size, a, b, c, d);
   1601   GOOGLE_DCHECK_EQ(out, begin + result->size());
   1602 }
   1603 
   1604 int GlobalReplaceSubstring(const string& substring,
   1605                            const string& replacement,
   1606                            string* s) {
   1607   GOOGLE_CHECK(s != NULL);
   1608   if (s->empty() || substring.empty())
   1609     return 0;
   1610   string tmp;
   1611   int num_replacements = 0;
   1612   int pos = 0;
   1613   for (int match_pos = s->find(substring.data(), pos, substring.length());
   1614        match_pos != string::npos;
   1615        pos = match_pos + substring.length(),
   1616            match_pos = s->find(substring.data(), pos, substring.length())) {
   1617     ++num_replacements;
   1618     // Append the original content before the match.
   1619     tmp.append(*s, pos, match_pos - pos);
   1620     // Append the replacement for the match.
   1621     tmp.append(replacement.begin(), replacement.end());
   1622   }
   1623   // Append the content after the last match. If no replacements were made, the
   1624   // original string is left untouched.
   1625   if (num_replacements > 0) {
   1626     tmp.append(*s, pos, s->length() - pos);
   1627     s->swap(tmp);
   1628   }
   1629   return num_replacements;
   1630 }
   1631 
   1632 int CalculateBase64EscapedLen(int input_len, bool do_padding) {
   1633   // Base64 encodes three bytes of input at a time. If the input is not
   1634   // divisible by three, we pad as appropriate.
   1635   //
   1636   // (from http://tools.ietf.org/html/rfc3548)
   1637   // Special processing is performed if fewer than 24 bits are available
   1638   // at the end of the data being encoded.  A full encoding quantum is
   1639   // always completed at the end of a quantity.  When fewer than 24 input
   1640   // bits are available in an input group, zero bits are added (on the
   1641   // right) to form an integral number of 6-bit groups.  Padding at the
   1642   // end of the data is performed using the '=' character.  Since all base
   1643   // 64 input is an integral number of octets, only the following cases
   1644   // can arise:
   1645 
   1646 
   1647   // Base64 encodes each three bytes of input into four bytes of output.
   1648   int len = (input_len / 3) * 4;
   1649 
   1650   if (input_len % 3 == 0) {
   1651     // (from http://tools.ietf.org/html/rfc3548)
   1652     // (1) the final quantum of encoding input is an integral multiple of 24
   1653     // bits; here, the final unit of encoded output will be an integral
   1654     // multiple of 4 characters with no "=" padding,
   1655   } else if (input_len % 3 == 1) {
   1656     // (from http://tools.ietf.org/html/rfc3548)
   1657     // (2) the final quantum of encoding input is exactly 8 bits; here, the
   1658     // final unit of encoded output will be two characters followed by two
   1659     // "=" padding characters, or
   1660     len += 2;
   1661     if (do_padding) {
   1662       len += 2;
   1663     }
   1664   } else {  // (input_len % 3 == 2)
   1665     // (from http://tools.ietf.org/html/rfc3548)
   1666     // (3) the final quantum of encoding input is exactly 16 bits; here, the
   1667     // final unit of encoded output will be three characters followed by one
   1668     // "=" padding character.
   1669     len += 3;
   1670     if (do_padding) {
   1671       len += 1;
   1672     }
   1673   }
   1674 
   1675   assert(len >= input_len);  // make sure we didn't overflow
   1676   return len;
   1677 }
   1678 
   1679 // Base64Escape does padding, so this calculation includes padding.
   1680 int CalculateBase64EscapedLen(int input_len) {
   1681   return CalculateBase64EscapedLen(input_len, true);
   1682 }
   1683 
   1684 // ----------------------------------------------------------------------
   1685 // int Base64Unescape() - base64 decoder
   1686 // int Base64Escape() - base64 encoder
   1687 // int WebSafeBase64Unescape() - Google's variation of base64 decoder
   1688 // int WebSafeBase64Escape() - Google's variation of base64 encoder
   1689 //
   1690 // Check out
   1691 // http://tools.ietf.org/html/rfc2045 for formal description, but what we
   1692 // care about is that...
   1693 //   Take the encoded stuff in groups of 4 characters and turn each
   1694 //   character into a code 0 to 63 thus:
   1695 //           A-Z map to 0 to 25
   1696 //           a-z map to 26 to 51
   1697 //           0-9 map to 52 to 61
   1698 //           +(- for WebSafe) maps to 62
   1699 //           /(_ for WebSafe) maps to 63
   1700 //   There will be four numbers, all less than 64 which can be represented
   1701 //   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
   1702 //   Arrange the 6 digit binary numbers into three bytes as such:
   1703 //   aaaaaabb bbbbcccc ccdddddd
   1704 //   Equals signs (one or two) are used at the end of the encoded block to
   1705 //   indicate that the text was not an integer multiple of three bytes long.
   1706 // ----------------------------------------------------------------------
   1707 
   1708 int Base64UnescapeInternal(const char *src_param, int szsrc,
   1709                            char *dest, int szdest,
   1710                            const signed char* unbase64) {
   1711   static const char kPad64Equals = '=';
   1712   static const char kPad64Dot = '.';
   1713 
   1714   int decode = 0;
   1715   int destidx = 0;
   1716   int state = 0;
   1717   unsigned int ch = 0;
   1718   unsigned int temp = 0;
   1719 
   1720   // If "char" is signed by default, using *src as an array index results in
   1721   // accessing negative array elements. Treat the input as a pointer to
   1722   // unsigned char to avoid this.
   1723   const unsigned char *src = reinterpret_cast<const unsigned char*>(src_param);
   1724 
   1725   // The GET_INPUT macro gets the next input character, skipping
   1726   // over any whitespace, and stopping when we reach the end of the
   1727   // string or when we read any non-data character.  The arguments are
   1728   // an arbitrary identifier (used as a label for goto) and the number
   1729   // of data bytes that must remain in the input to avoid aborting the
   1730   // loop.
   1731 #define GET_INPUT(label, remain)                 \
   1732   label:                                         \
   1733     --szsrc;                                     \
   1734     ch = *src++;                                 \
   1735     decode = unbase64[ch];                       \
   1736     if (decode < 0) {                            \
   1737       if (ascii_isspace(ch) && szsrc >= remain)  \
   1738         goto label;                              \
   1739       state = 4 - remain;                        \
   1740       break;                                     \
   1741     }
   1742 
   1743   // if dest is null, we're just checking to see if it's legal input
   1744   // rather than producing output.  (I suspect this could just be done
   1745   // with a regexp...).  We duplicate the loop so this test can be
   1746   // outside it instead of in every iteration.
   1747 
   1748   if (dest) {
   1749     // This loop consumes 4 input bytes and produces 3 output bytes
   1750     // per iteration.  We can't know at the start that there is enough
   1751     // data left in the string for a full iteration, so the loop may
   1752     // break out in the middle; if so 'state' will be set to the
   1753     // number of input bytes read.
   1754 
   1755     while (szsrc >= 4)  {
   1756       // We'll start by optimistically assuming that the next four
   1757       // bytes of the string (src[0..3]) are four good data bytes
   1758       // (that is, no nulls, whitespace, padding chars, or illegal
   1759       // chars).  We need to test src[0..2] for nulls individually
   1760       // before constructing temp to preserve the property that we
   1761       // never read past a null in the string (no matter how long
   1762       // szsrc claims the string is).
   1763 
   1764       if (!src[0] || !src[1] || !src[2] ||
   1765           (temp = ((unsigned(unbase64[src[0]]) << 18) |
   1766                    (unsigned(unbase64[src[1]]) << 12) |
   1767                    (unsigned(unbase64[src[2]]) << 6) |
   1768                    (unsigned(unbase64[src[3]])))) & 0x80000000) {
   1769         // Iff any of those four characters was bad (null, illegal,
   1770         // whitespace, padding), then temp's high bit will be set
   1771         // (because unbase64[] is -1 for all bad characters).
   1772         //
   1773         // We'll back up and resort to the slower decoder, which knows
   1774         // how to handle those cases.
   1775 
   1776         GET_INPUT(first, 4);
   1777         temp = decode;
   1778         GET_INPUT(second, 3);
   1779         temp = (temp << 6) | decode;
   1780         GET_INPUT(third, 2);
   1781         temp = (temp << 6) | decode;
   1782         GET_INPUT(fourth, 1);
   1783         temp = (temp << 6) | decode;
   1784       } else {
   1785         // We really did have four good data bytes, so advance four
   1786         // characters in the string.
   1787 
   1788         szsrc -= 4;
   1789         src += 4;
   1790         decode = -1;
   1791         ch = '\0';
   1792       }
   1793 
   1794       // temp has 24 bits of input, so write that out as three bytes.
   1795 
   1796       if (destidx+3 > szdest) return -1;
   1797       dest[destidx+2] = temp;
   1798       temp >>= 8;
   1799       dest[destidx+1] = temp;
   1800       temp >>= 8;
   1801       dest[destidx] = temp;
   1802       destidx += 3;
   1803     }
   1804   } else {
   1805     while (szsrc >= 4)  {
   1806       if (!src[0] || !src[1] || !src[2] ||
   1807           (temp = ((unsigned(unbase64[src[0]]) << 18) |
   1808                    (unsigned(unbase64[src[1]]) << 12) |
   1809                    (unsigned(unbase64[src[2]]) << 6) |
   1810                    (unsigned(unbase64[src[3]])))) & 0x80000000) {
   1811         GET_INPUT(first_no_dest, 4);
   1812         GET_INPUT(second_no_dest, 3);
   1813         GET_INPUT(third_no_dest, 2);
   1814         GET_INPUT(fourth_no_dest, 1);
   1815       } else {
   1816         szsrc -= 4;
   1817         src += 4;
   1818         decode = -1;
   1819         ch = '\0';
   1820       }
   1821       destidx += 3;
   1822     }
   1823   }
   1824 
   1825 #undef GET_INPUT
   1826 
   1827   // if the loop terminated because we read a bad character, return
   1828   // now.
   1829   if (decode < 0 && ch != '\0' &&
   1830       ch != kPad64Equals && ch != kPad64Dot && !ascii_isspace(ch))
   1831     return -1;
   1832 
   1833   if (ch == kPad64Equals || ch == kPad64Dot) {
   1834     // if we stopped by hitting an '=' or '.', un-read that character -- we'll
   1835     // look at it again when we count to check for the proper number of
   1836     // equals signs at the end.
   1837     ++szsrc;
   1838     --src;
   1839   } else {
   1840     // This loop consumes 1 input byte per iteration.  It's used to
   1841     // clean up the 0-3 input bytes remaining when the first, faster
   1842     // loop finishes.  'temp' contains the data from 'state' input
   1843     // characters read by the first loop.
   1844     while (szsrc > 0)  {
   1845       --szsrc;
   1846       ch = *src++;
   1847       decode = unbase64[ch];
   1848       if (decode < 0) {
   1849         if (ascii_isspace(ch)) {
   1850           continue;
   1851         } else if (ch == '\0') {
   1852           break;
   1853         } else if (ch == kPad64Equals || ch == kPad64Dot) {
   1854           // back up one character; we'll read it again when we check
   1855           // for the correct number of pad characters at the end.
   1856           ++szsrc;
   1857           --src;
   1858           break;
   1859         } else {
   1860           return -1;
   1861         }
   1862       }
   1863 
   1864       // Each input character gives us six bits of output.
   1865       temp = (temp << 6) | decode;
   1866       ++state;
   1867       if (state == 4) {
   1868         // If we've accumulated 24 bits of output, write that out as
   1869         // three bytes.
   1870         if (dest) {
   1871           if (destidx+3 > szdest) return -1;
   1872           dest[destidx+2] = temp;
   1873           temp >>= 8;
   1874           dest[destidx+1] = temp;
   1875           temp >>= 8;
   1876           dest[destidx] = temp;
   1877         }
   1878         destidx += 3;
   1879         state = 0;
   1880         temp = 0;
   1881       }
   1882     }
   1883   }
   1884 
   1885   // Process the leftover data contained in 'temp' at the end of the input.
   1886   int expected_equals = 0;
   1887   switch (state) {
   1888     case 0:
   1889       // Nothing left over; output is a multiple of 3 bytes.
   1890       break;
   1891 
   1892     case 1:
   1893       // Bad input; we have 6 bits left over.
   1894       return -1;
   1895 
   1896     case 2:
   1897       // Produce one more output byte from the 12 input bits we have left.
   1898       if (dest) {
   1899         if (destidx+1 > szdest) return -1;
   1900         temp >>= 4;
   1901         dest[destidx] = temp;
   1902       }
   1903       ++destidx;
   1904       expected_equals = 2;
   1905       break;
   1906 
   1907     case 3:
   1908       // Produce two more output bytes from the 18 input bits we have left.
   1909       if (dest) {
   1910         if (destidx+2 > szdest) return -1;
   1911         temp >>= 2;
   1912         dest[destidx+1] = temp;
   1913         temp >>= 8;
   1914         dest[destidx] = temp;
   1915       }
   1916       destidx += 2;
   1917       expected_equals = 1;
   1918       break;
   1919 
   1920     default:
   1921       // state should have no other values at this point.
   1922       GOOGLE_LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
   1923   }
   1924 
   1925   // The remainder of the string should be all whitespace, mixed with
   1926   // exactly 0 equals signs, or exactly 'expected_equals' equals
   1927   // signs.  (Always accepting 0 equals signs is a google extension
   1928   // not covered in the RFC, as is accepting dot as the pad character.)
   1929 
   1930   int equals = 0;
   1931   while (szsrc > 0 && *src) {
   1932     if (*src == kPad64Equals || *src == kPad64Dot)
   1933       ++equals;
   1934     else if (!ascii_isspace(*src))
   1935       return -1;
   1936     --szsrc;
   1937     ++src;
   1938   }
   1939 
   1940   return (equals == 0 || equals == expected_equals) ? destidx : -1;
   1941 }
   1942 
   1943 // The arrays below were generated by the following code
   1944 // #include <sys/time.h>
   1945 // #include <stdlib.h>
   1946 // #include <string.h>
   1947 // main()
   1948 // {
   1949 //   static const char Base64[] =
   1950 //     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
   1951 //   char *pos;
   1952 //   int idx, i, j;
   1953 //   printf("    ");
   1954 //   for (i = 0; i < 255; i += 8) {
   1955 //     for (j = i; j < i + 8; j++) {
   1956 //       pos = strchr(Base64, j);
   1957 //       if ((pos == NULL) || (j == 0))
   1958 //         idx = -1;
   1959 //       else
   1960 //         idx = pos - Base64;
   1961 //       if (idx == -1)
   1962 //         printf(" %2d,     ", idx);
   1963 //       else
   1964 //         printf(" %2d/*%c*/,", idx, j);
   1965 //     }
   1966 //     printf("\n    ");
   1967 //   }
   1968 // }
   1969 //
   1970 // where the value of "Base64[]" was replaced by one of the base-64 conversion
   1971 // tables from the functions below.
   1972 static const signed char kUnBase64[] = {
   1973   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1974   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1975   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1976   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1977   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1978   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
   1979   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
   1980   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
   1981   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
   1982   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
   1983   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
   1984   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
   1985   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
   1986   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
   1987   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
   1988   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
   1989   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1990   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1991   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1992   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1993   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1994   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1995   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1996   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1997   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1998   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   1999   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2000   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2001   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2002   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2003   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2004   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
   2005 };
   2006 static const signed char kUnWebSafeBase64[] = {
   2007   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2008   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2009   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2010   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2011   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2012   -1,      -1,      -1,      -1,      -1,      62/*-*/, -1,      -1,
   2013   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
   2014   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
   2015   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
   2016   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
   2017   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
   2018   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      63/*_*/,
   2019   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
   2020   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
   2021   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
   2022   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
   2023   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2024   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2025   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2026   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2027   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2028   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2029   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2030   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2031   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2032   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2033   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2034   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2035   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2036   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2037   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   2038   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
   2039 };
   2040 
   2041 int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) {
   2042   return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64);
   2043 }
   2044 
   2045 static bool Base64UnescapeInternal(const char* src, int slen, string* dest,
   2046                                    const signed char* unbase64) {
   2047   // Determine the size of the output string.  Base64 encodes every 3 bytes into
   2048   // 4 characters.  any leftover chars are added directly for good measure.
   2049   // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
   2050   const int dest_len = 3 * (slen / 4) + (slen % 4);
   2051 
   2052   dest->resize(dest_len);
   2053 
   2054   // We are getting the destination buffer by getting the beginning of the
   2055   // string and converting it into a char *.
   2056   const int len = Base64UnescapeInternal(src, slen, string_as_array(dest),
   2057                                          dest_len, unbase64);
   2058   if (len < 0) {
   2059     dest->clear();
   2060     return false;
   2061   }
   2062 
   2063   // could be shorter if there was padding
   2064   GOOGLE_DCHECK_LE(len, dest_len);
   2065   dest->erase(len);
   2066 
   2067   return true;
   2068 }
   2069 
   2070 bool Base64Unescape(StringPiece src, string* dest) {
   2071   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
   2072 }
   2073 
   2074 bool WebSafeBase64Unescape(StringPiece src, string* dest) {
   2075   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
   2076 }
   2077 
   2078 int Base64EscapeInternal(const unsigned char *src, int szsrc,
   2079                          char *dest, int szdest, const char *base64,
   2080                          bool do_padding) {
   2081   static const char kPad64 = '=';
   2082 
   2083   if (szsrc <= 0) return 0;
   2084 
   2085   if (szsrc * 4 > szdest * 3) return 0;
   2086 
   2087   char *cur_dest = dest;
   2088   const unsigned char *cur_src = src;
   2089 
   2090   char *limit_dest = dest + szdest;
   2091   const unsigned char *limit_src = src + szsrc;
   2092 
   2093   // Three bytes of data encodes to four characters of cyphertext.
   2094   // So we can pump through three-byte chunks atomically.
   2095   while (cur_src < limit_src - 3) {  // keep going as long as we have >= 32 bits
   2096     uint32 in = BigEndian::Load32(cur_src) >> 8;
   2097 
   2098     cur_dest[0] = base64[in >> 18];
   2099     in &= 0x3FFFF;
   2100     cur_dest[1] = base64[in >> 12];
   2101     in &= 0xFFF;
   2102     cur_dest[2] = base64[in >> 6];
   2103     in &= 0x3F;
   2104     cur_dest[3] = base64[in];
   2105 
   2106     cur_dest += 4;
   2107     cur_src += 3;
   2108   }
   2109   // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
   2110   szdest = limit_dest - cur_dest;
   2111   szsrc = limit_src - cur_src;
   2112 
   2113   /* now deal with the tail (<=3 bytes) */
   2114   switch (szsrc) {
   2115     case 0:
   2116       // Nothing left; nothing more to do.
   2117       break;
   2118     case 1: {
   2119       // One byte left: this encodes to two characters, and (optionally)
   2120       // two pad characters to round out the four-character cypherblock.
   2121       if ((szdest -= 2) < 0) return 0;
   2122       uint32 in = cur_src[0];
   2123       cur_dest[0] = base64[in >> 2];
   2124       in &= 0x3;
   2125       cur_dest[1] = base64[in << 4];
   2126       cur_dest += 2;
   2127       if (do_padding) {
   2128         if ((szdest -= 2) < 0) return 0;
   2129         cur_dest[0] = kPad64;
   2130         cur_dest[1] = kPad64;
   2131         cur_dest += 2;
   2132       }
   2133       break;
   2134     }
   2135     case 2: {
   2136       // Two bytes left: this encodes to three characters, and (optionally)
   2137       // one pad character to round out the four-character cypherblock.
   2138       if ((szdest -= 3) < 0) return 0;
   2139       uint32 in = BigEndian::Load16(cur_src);
   2140       cur_dest[0] = base64[in >> 10];
   2141       in &= 0x3FF;
   2142       cur_dest[1] = base64[in >> 4];
   2143       in &= 0x00F;
   2144       cur_dest[2] = base64[in << 2];
   2145       cur_dest += 3;
   2146       if (do_padding) {
   2147         if ((szdest -= 1) < 0) return 0;
   2148         cur_dest[0] = kPad64;
   2149         cur_dest += 1;
   2150       }
   2151       break;
   2152     }
   2153     case 3: {
   2154       // Three bytes left: same as in the big loop above.  We can't do this in
   2155       // the loop because the loop above always reads 4 bytes, and the fourth
   2156       // byte is past the end of the input.
   2157       if ((szdest -= 4) < 0) return 0;
   2158       uint32 in = (cur_src[0] << 16) + BigEndian::Load16(cur_src + 1);
   2159       cur_dest[0] = base64[in >> 18];
   2160       in &= 0x3FFFF;
   2161       cur_dest[1] = base64[in >> 12];
   2162       in &= 0xFFF;
   2163       cur_dest[2] = base64[in >> 6];
   2164       in &= 0x3F;
   2165       cur_dest[3] = base64[in];
   2166       cur_dest += 4;
   2167       break;
   2168     }
   2169     default:
   2170       // Should not be reached: blocks of 4 bytes are handled
   2171       // in the while loop before this switch statement.
   2172       GOOGLE_LOG(FATAL) << "Logic problem? szsrc = " << szsrc;
   2173       break;
   2174   }
   2175   return (cur_dest - dest);
   2176 }
   2177 
   2178 static const char kBase64Chars[] =
   2179 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
   2180 
   2181 static const char kWebSafeBase64Chars[] =
   2182 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
   2183 
   2184 int Base64Escape(const unsigned char *src, int szsrc, char *dest, int szdest) {
   2185   return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);
   2186 }
   2187 int WebSafeBase64Escape(const unsigned char *src, int szsrc, char *dest,
   2188                         int szdest, bool do_padding) {
   2189   return Base64EscapeInternal(src, szsrc, dest, szdest,
   2190                               kWebSafeBase64Chars, do_padding);
   2191 }
   2192 
   2193 void Base64EscapeInternal(const unsigned char* src, int szsrc,
   2194                           string* dest, bool do_padding,
   2195                           const char* base64_chars) {
   2196   const int calc_escaped_size =
   2197     CalculateBase64EscapedLen(szsrc, do_padding);
   2198   dest->resize(calc_escaped_size);
   2199   const int escaped_len = Base64EscapeInternal(src, szsrc,
   2200                                                string_as_array(dest),
   2201                                                dest->size(),
   2202                                                base64_chars,
   2203                                                do_padding);
   2204   GOOGLE_DCHECK_EQ(calc_escaped_size, escaped_len);
   2205   dest->erase(escaped_len);
   2206 }
   2207 
   2208 void Base64Escape(const unsigned char *src, int szsrc,
   2209                   string* dest, bool do_padding) {
   2210   Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars);
   2211 }
   2212 
   2213 void WebSafeBase64Escape(const unsigned char *src, int szsrc,
   2214                          string *dest, bool do_padding) {
   2215   Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars);
   2216 }
   2217 
   2218 void Base64Escape(StringPiece src, string* dest) {
   2219   Base64Escape(reinterpret_cast<const unsigned char*>(src.data()),
   2220                src.size(), dest, true);
   2221 }
   2222 
   2223 void WebSafeBase64Escape(StringPiece src, string* dest) {
   2224   WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
   2225                       src.size(), dest, false);
   2226 }
   2227 
   2228 void WebSafeBase64EscapeWithPadding(StringPiece src, string* dest) {
   2229   WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
   2230                       src.size(), dest, true);
   2231 }
   2232 
   2233 // Helper to append a Unicode code point to a string as UTF8, without bringing
   2234 // in any external dependencies.
   2235 int EncodeAsUTF8Char(uint32 code_point, char* output) {
   2236   uint32 tmp = 0;
   2237   int len = 0;
   2238   if (code_point <= 0x7f) {
   2239     tmp = code_point;
   2240     len = 1;
   2241   } else if (code_point <= 0x07ff) {
   2242     tmp = 0x0000c080 |
   2243         ((code_point & 0x07c0) << 2) |
   2244         (code_point & 0x003f);
   2245     len = 2;
   2246   } else if (code_point <= 0xffff) {
   2247     tmp = 0x00e08080 |
   2248         ((code_point & 0xf000) << 4) |
   2249         ((code_point & 0x0fc0) << 2) |
   2250         (code_point & 0x003f);
   2251     len = 3;
   2252   } else {
   2253     // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
   2254     // normally only defined up to there as well.
   2255     tmp = 0xf0808080 |
   2256         ((code_point & 0x1c0000) << 6) |
   2257         ((code_point & 0x03f000) << 4) |
   2258         ((code_point & 0x000fc0) << 2) |
   2259         (code_point & 0x003f);
   2260     len = 4;
   2261   }
   2262   tmp = ghtonl(tmp);
   2263   memcpy(output, reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
   2264   return len;
   2265 }
   2266 
   2267 // Table of UTF-8 character lengths, based on first byte
   2268 static const unsigned char kUTF8LenTbl[256] = {
   2269   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
   2270   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
   2271   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
   2272   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
   2273 
   2274   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
   2275   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
   2276   2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
   2277   3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
   2278 };
   2279 
   2280 // Return length of a single UTF-8 source character
   2281 int UTF8FirstLetterNumBytes(const char* src, int len) {
   2282   if (len == 0) {
   2283     return 0;
   2284   }
   2285   return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
   2286 }
   2287 
   2288 }  // namespace protobuf
   2289 }  // namespace google
   2290