Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/string_util.h"
      6 
      7 #include "build/build_config.h"
      8 
      9 #include <ctype.h>
     10 #include <errno.h>
     11 #include <math.h>
     12 #include <stdarg.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <string.h>
     16 #include <time.h>
     17 #include <wchar.h>
     18 #include <wctype.h>
     19 
     20 #include <algorithm>
     21 #include <vector>
     22 
     23 #include "base/basictypes.h"
     24 #include "base/logging.h"
     25 #include "base/singleton.h"
     26 #include "base/third_party/dmg_fp/dmg_fp.h"
     27 
     28 namespace {
     29 
     30 // Force the singleton used by Empty[W]String[16] to be a unique type. This
     31 // prevents other code that might accidentally use Singleton<string> from
     32 // getting our internal one.
     33 struct EmptyStrings {
     34   EmptyStrings() {}
     35   const std::string s;
     36   const std::wstring ws;
     37   const string16 s16;
     38 };
     39 
     40 // Used by ReplaceStringPlaceholders to track the position in the string of
     41 // replaced parameters.
     42 struct ReplacementOffset {
     43   ReplacementOffset(uintptr_t parameter, size_t offset)
     44       : parameter(parameter),
     45         offset(offset) {}
     46 
     47   // Index of the parameter.
     48   uintptr_t parameter;
     49 
     50   // Starting position in the string.
     51   size_t offset;
     52 };
     53 
     54 static bool CompareParameter(const ReplacementOffset& elem1,
     55                              const ReplacementOffset& elem2) {
     56   return elem1.parameter < elem2.parameter;
     57 }
     58 
     59 // Generalized string-to-number conversion.
     60 //
     61 // StringToNumberTraits should provide:
     62 //  - a typedef for string_type, the STL string type used as input.
     63 //  - a typedef for value_type, the target numeric type.
     64 //  - a static function, convert_func, which dispatches to an appropriate
     65 //    strtol-like function and returns type value_type.
     66 //  - a static function, valid_func, which validates |input| and returns a bool
     67 //    indicating whether it is in proper form.  This is used to check for
     68 //    conditions that convert_func tolerates but should result in
     69 //    StringToNumber returning false.  For strtol-like funtions, valid_func
     70 //    should check for leading whitespace.
     71 template<typename StringToNumberTraits>
     72 bool StringToNumber(const typename StringToNumberTraits::string_type& input,
     73                     typename StringToNumberTraits::value_type* output) {
     74   typedef StringToNumberTraits traits;
     75 
     76   errno = 0;  // Thread-safe?  It is on at least Mac, Linux, and Windows.
     77   typename traits::string_type::value_type* endptr = NULL;
     78   typename traits::value_type value = traits::convert_func(input.c_str(),
     79                                                            &endptr);
     80   *output = value;
     81 
     82   // Cases to return false:
     83   //  - If errno is ERANGE, there was an overflow or underflow.
     84   //  - If the input string is empty, there was nothing to parse.
     85   //  - If endptr does not point to the end of the string, there are either
     86   //    characters remaining in the string after a parsed number, or the string
     87   //    does not begin with a parseable number.  endptr is compared to the
     88   //    expected end given the string's stated length to correctly catch cases
     89   //    where the string contains embedded NUL characters.
     90   //  - valid_func determines that the input is not in preferred form.
     91   return errno == 0 &&
     92          !input.empty() &&
     93          input.c_str() + input.length() == endptr &&
     94          traits::valid_func(input);
     95 }
     96 
     97 static int strtoi(const char *nptr, char **endptr, int base) {
     98   long res = strtol(nptr, endptr, base);
     99 #if __LP64__
    100   // Long is 64-bits, we have to handle under/overflow ourselves.
    101   if (res > kint32max) {
    102     res = kint32max;
    103     errno = ERANGE;
    104   } else if (res < kint32min) {
    105     res = kint32min;
    106     errno = ERANGE;
    107   }
    108 #endif
    109   return static_cast<int>(res);
    110 }
    111 
    112 static unsigned int strtoui(const char *nptr, char **endptr, int base) {
    113   unsigned long res = strtoul(nptr, endptr, base);
    114 #if __LP64__
    115   // Long is 64-bits, we have to handle under/overflow ourselves.  Test to see
    116   // if the result can fit into 32-bits (as signed or unsigned).
    117   if (static_cast<int>(static_cast<long>(res)) != static_cast<long>(res) &&
    118       static_cast<unsigned int>(res) != res) {
    119     res = kuint32max;
    120     errno = ERANGE;
    121   }
    122 #endif
    123   return static_cast<unsigned int>(res);
    124 }
    125 
    126 class StringToIntTraits {
    127  public:
    128   typedef std::string string_type;
    129   typedef int value_type;
    130   static const int kBase = 10;
    131   static inline value_type convert_func(const string_type::value_type* str,
    132                                         string_type::value_type** endptr) {
    133     return strtoi(str, endptr, kBase);
    134   }
    135   static inline bool valid_func(const string_type& str) {
    136     return !str.empty() && !isspace(str[0]);
    137   }
    138 };
    139 
    140 class String16ToIntTraits {
    141  public:
    142   typedef string16 string_type;
    143   typedef int value_type;
    144   static const int kBase = 10;
    145   static inline value_type convert_func(const string_type::value_type* str,
    146                                         string_type::value_type** endptr) {
    147 #if defined(WCHAR_T_IS_UTF16)
    148     return wcstol(str, endptr, kBase);
    149 #elif defined(WCHAR_T_IS_UTF32)
    150     std::string ascii_string = UTF16ToASCII(string16(str));
    151     char* ascii_end = NULL;
    152     value_type ret = strtoi(ascii_string.c_str(), &ascii_end, kBase);
    153     if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
    154       *endptr =
    155           const_cast<string_type::value_type*>(str) + ascii_string.length();
    156     }
    157     return ret;
    158 #endif
    159   }
    160   static inline bool valid_func(const string_type& str) {
    161     return !str.empty() && !iswspace(str[0]);
    162   }
    163 };
    164 
    165 class StringToInt64Traits {
    166  public:
    167   typedef std::string string_type;
    168   typedef int64 value_type;
    169   static const int kBase = 10;
    170   static inline value_type convert_func(const string_type::value_type* str,
    171                                         string_type::value_type** endptr) {
    172 #ifdef OS_WIN
    173     return _strtoi64(str, endptr, kBase);
    174 #else  // assume OS_POSIX
    175     return strtoll(str, endptr, kBase);
    176 #endif
    177   }
    178   static inline bool valid_func(const string_type& str) {
    179     return !str.empty() && !isspace(str[0]);
    180   }
    181 };
    182 
    183 class String16ToInt64Traits {
    184  public:
    185   typedef string16 string_type;
    186   typedef int64 value_type;
    187   static const int kBase = 10;
    188   static inline value_type convert_func(const string_type::value_type* str,
    189                                         string_type::value_type** endptr) {
    190 #ifdef OS_WIN
    191     return _wcstoi64(str, endptr, kBase);
    192 #else  // assume OS_POSIX
    193     std::string ascii_string = UTF16ToASCII(string16(str));
    194     char* ascii_end = NULL;
    195     value_type ret = strtoll(ascii_string.c_str(), &ascii_end, kBase);
    196     if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
    197       *endptr =
    198           const_cast<string_type::value_type*>(str) + ascii_string.length();
    199     }
    200     return ret;
    201 #endif
    202   }
    203   static inline bool valid_func(const string_type& str) {
    204     return !str.empty() && !iswspace(str[0]);
    205   }
    206 };
    207 
    208 // For the HexString variants, use the unsigned variants like strtoul for
    209 // convert_func so that input like "0x80000000" doesn't result in an overflow.
    210 
    211 class HexStringToIntTraits {
    212  public:
    213   typedef std::string string_type;
    214   typedef int value_type;
    215   static const int kBase = 16;
    216   static inline value_type convert_func(const string_type::value_type* str,
    217                                         string_type::value_type** endptr) {
    218     return strtoui(str, endptr, kBase);
    219   }
    220   static inline bool valid_func(const string_type& str) {
    221     return !str.empty() && !isspace(str[0]);
    222   }
    223 };
    224 
    225 class HexString16ToIntTraits {
    226  public:
    227   typedef string16 string_type;
    228   typedef int value_type;
    229   static const int kBase = 16;
    230   static inline value_type convert_func(const string_type::value_type* str,
    231                                         string_type::value_type** endptr) {
    232 #if defined(WCHAR_T_IS_UTF16)
    233     return wcstoul(str, endptr, kBase);
    234 #elif defined(WCHAR_T_IS_UTF32)
    235     std::string ascii_string = UTF16ToASCII(string16(str));
    236     char* ascii_end = NULL;
    237     value_type ret = strtoui(ascii_string.c_str(), &ascii_end, kBase);
    238     if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
    239       *endptr =
    240           const_cast<string_type::value_type*>(str) + ascii_string.length();
    241     }
    242     return ret;
    243 #endif
    244   }
    245   static inline bool valid_func(const string_type& str) {
    246     return !str.empty() && !iswspace(str[0]);
    247   }
    248 };
    249 
    250 class StringToDoubleTraits {
    251  public:
    252   typedef std::string string_type;
    253   typedef double value_type;
    254   static inline value_type convert_func(const string_type::value_type* str,
    255                                         string_type::value_type** endptr) {
    256     return dmg_fp::strtod(str, endptr);
    257   }
    258   static inline bool valid_func(const string_type& str) {
    259     return !str.empty() && !isspace(str[0]);
    260   }
    261 };
    262 
    263 class String16ToDoubleTraits {
    264  public:
    265   typedef string16 string_type;
    266   typedef double value_type;
    267   static inline value_type convert_func(const string_type::value_type* str,
    268                                         string_type::value_type** endptr) {
    269     // Because dmg_fp::strtod does not like char16, we convert it to ASCII.
    270     // In theory, this should be safe, but it's possible that 16-bit chars
    271     // might get ignored by accident causing something to be parsed when it
    272     // shouldn't.
    273     std::string ascii_string = UTF16ToASCII(string16(str));
    274     char* ascii_end = NULL;
    275     value_type ret = dmg_fp::strtod(ascii_string.c_str(), &ascii_end);
    276     if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
    277       // Put endptr at end of input string, so it's not recognized as an error.
    278       *endptr =
    279           const_cast<string_type::value_type*>(str) + ascii_string.length();
    280     }
    281 
    282     return ret;
    283   }
    284   static inline bool valid_func(const string_type& str) {
    285     return !str.empty() && !iswspace(str[0]);
    286   }
    287 };
    288 
    289 }  // namespace
    290 
    291 
    292 namespace base {
    293 
    294 bool IsWprintfFormatPortable(const wchar_t* format) {
    295   for (const wchar_t* position = format; *position != '\0'; ++position) {
    296     if (*position == '%') {
    297       bool in_specification = true;
    298       bool modifier_l = false;
    299       while (in_specification) {
    300         // Eat up characters until reaching a known specifier.
    301         if (*++position == '\0') {
    302           // The format string ended in the middle of a specification.  Call
    303           // it portable because no unportable specifications were found.  The
    304           // string is equally broken on all platforms.
    305           return true;
    306         }
    307 
    308         if (*position == 'l') {
    309           // 'l' is the only thing that can save the 's' and 'c' specifiers.
    310           modifier_l = true;
    311         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
    312                    *position == 'S' || *position == 'C' || *position == 'F' ||
    313                    *position == 'D' || *position == 'O' || *position == 'U') {
    314           // Not portable.
    315           return false;
    316         }
    317 
    318         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
    319           // Portable, keep scanning the rest of the format string.
    320           in_specification = false;
    321         }
    322       }
    323     }
    324   }
    325 
    326   return true;
    327 }
    328 
    329 
    330 }  // namespace base
    331 
    332 
    333 const std::string& EmptyString() {
    334   return Singleton<EmptyStrings>::get()->s;
    335 }
    336 
    337 const std::wstring& EmptyWString() {
    338   return Singleton<EmptyStrings>::get()->ws;
    339 }
    340 
    341 const string16& EmptyString16() {
    342   return Singleton<EmptyStrings>::get()->s16;
    343 }
    344 
    345 #define WHITESPACE_UNICODE \
    346   0x0009, /* <control-0009> to <control-000D> */ \
    347   0x000A,                                        \
    348   0x000B,                                        \
    349   0x000C,                                        \
    350   0x000D,                                        \
    351   0x0020, /* Space */                            \
    352   0x0085, /* <control-0085> */                   \
    353   0x00A0, /* No-Break Space */                   \
    354   0x1680, /* Ogham Space Mark */                 \
    355   0x180E, /* Mongolian Vowel Separator */        \
    356   0x2000, /* En Quad to Hair Space */            \
    357   0x2001,                                        \
    358   0x2002,                                        \
    359   0x2003,                                        \
    360   0x2004,                                        \
    361   0x2005,                                        \
    362   0x2006,                                        \
    363   0x2007,                                        \
    364   0x2008,                                        \
    365   0x2009,                                        \
    366   0x200A,                                        \
    367   0x200C, /* Zero Width Non-Joiner */            \
    368   0x2028, /* Line Separator */                   \
    369   0x2029, /* Paragraph Separator */              \
    370   0x202F, /* Narrow No-Break Space */            \
    371   0x205F, /* Medium Mathematical Space */        \
    372   0x3000, /* Ideographic Space */                \
    373   0
    374 
    375 const wchar_t kWhitespaceWide[] = {
    376   WHITESPACE_UNICODE
    377 };
    378 const char16 kWhitespaceUTF16[] = {
    379   WHITESPACE_UNICODE
    380 };
    381 const char kWhitespaceASCII[] = {
    382   0x09,    // <control-0009> to <control-000D>
    383   0x0A,
    384   0x0B,
    385   0x0C,
    386   0x0D,
    387   0x20,    // Space
    388   0
    389 };
    390 
    391 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
    392 
    393 template<typename STR>
    394 TrimPositions TrimStringT(const STR& input,
    395                           const typename STR::value_type trim_chars[],
    396                           TrimPositions positions,
    397                           STR* output) {
    398   // Find the edges of leading/trailing whitespace as desired.
    399   const typename STR::size_type last_char = input.length() - 1;
    400   const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
    401       input.find_first_not_of(trim_chars) : 0;
    402   const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
    403       input.find_last_not_of(trim_chars) : last_char;
    404 
    405   // When the string was all whitespace, report that we stripped off whitespace
    406   // from whichever position the caller was interested in.  For empty input, we
    407   // stripped no whitespace, but we still need to clear |output|.
    408   if (input.empty() ||
    409       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
    410     bool input_was_empty = input.empty();  // in case output == &input
    411     output->clear();
    412     return input_was_empty ? TRIM_NONE : positions;
    413   }
    414 
    415   // Trim the whitespace.
    416   *output =
    417       input.substr(first_good_char, last_good_char - first_good_char + 1);
    418 
    419   // Return where we trimmed from.
    420   return static_cast<TrimPositions>(
    421       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
    422       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
    423 }
    424 
    425 bool TrimString(const std::wstring& input,
    426                 const wchar_t trim_chars[],
    427                 std::wstring* output) {
    428   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    429 }
    430 
    431 #if !defined(WCHAR_T_IS_UTF16)
    432 bool TrimString(const string16& input,
    433                 const char16 trim_chars[],
    434                 string16* output) {
    435   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    436 }
    437 #endif
    438 
    439 bool TrimString(const std::string& input,
    440                 const char trim_chars[],
    441                 std::string* output) {
    442   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    443 }
    444 
    445 TrimPositions TrimWhitespace(const std::wstring& input,
    446                              TrimPositions positions,
    447                              std::wstring* output) {
    448   return TrimStringT(input, kWhitespaceWide, positions, output);
    449 }
    450 
    451 #if !defined(WCHAR_T_IS_UTF16)
    452 TrimPositions TrimWhitespace(const string16& input,
    453                              TrimPositions positions,
    454                              string16* output) {
    455   return TrimStringT(input, kWhitespaceUTF16, positions, output);
    456 }
    457 #endif
    458 
    459 TrimPositions TrimWhitespaceASCII(const std::string& input,
    460                                   TrimPositions positions,
    461                                   std::string* output) {
    462   return TrimStringT(input, kWhitespaceASCII, positions, output);
    463 }
    464 
    465 // This function is only for backward-compatibility.
    466 // To be removed when all callers are updated.
    467 TrimPositions TrimWhitespace(const std::string& input,
    468                              TrimPositions positions,
    469                              std::string* output) {
    470   return TrimWhitespaceASCII(input, positions, output);
    471 }
    472 
    473 template<typename STR>
    474 STR CollapseWhitespaceT(const STR& text,
    475                         bool trim_sequences_with_line_breaks) {
    476   STR result;
    477   result.resize(text.size());
    478 
    479   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
    480   // will trim any leading whitespace.
    481   bool in_whitespace = true;
    482   bool already_trimmed = true;
    483 
    484   int chars_written = 0;
    485   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
    486     if (IsWhitespace(*i)) {
    487       if (!in_whitespace) {
    488         // Reduce all whitespace sequences to a single space.
    489         in_whitespace = true;
    490         result[chars_written++] = L' ';
    491       }
    492       if (trim_sequences_with_line_breaks && !already_trimmed &&
    493           ((*i == '\n') || (*i == '\r'))) {
    494         // Whitespace sequences containing CR or LF are eliminated entirely.
    495         already_trimmed = true;
    496         --chars_written;
    497       }
    498     } else {
    499       // Non-whitespace chracters are copied straight across.
    500       in_whitespace = false;
    501       already_trimmed = false;
    502       result[chars_written++] = *i;
    503     }
    504   }
    505 
    506   if (in_whitespace && !already_trimmed) {
    507     // Any trailing whitespace is eliminated.
    508     --chars_written;
    509   }
    510 
    511   result.resize(chars_written);
    512   return result;
    513 }
    514 
    515 std::wstring CollapseWhitespace(const std::wstring& text,
    516                                 bool trim_sequences_with_line_breaks) {
    517   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    518 }
    519 
    520 #if !defined(WCHAR_T_IS_UTF16)
    521 string16 CollapseWhitespace(const string16& text,
    522                             bool trim_sequences_with_line_breaks) {
    523   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    524 }
    525 #endif
    526 
    527 std::string CollapseWhitespaceASCII(const std::string& text,
    528                                     bool trim_sequences_with_line_breaks) {
    529   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    530 }
    531 
    532 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
    533   for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
    534     if (!IsAsciiWhitespace(*i))
    535       return false;
    536   }
    537   return true;
    538 }
    539 
    540 bool ContainsOnlyWhitespace(const string16& str) {
    541   for (string16::const_iterator i(str.begin()); i != str.end(); ++i) {
    542     if (!IsWhitespace(*i))
    543       return false;
    544   }
    545   return true;
    546 }
    547 
    548 std::string WideToASCII(const std::wstring& wide) {
    549   DCHECK(IsStringASCII(wide)) << wide;
    550   return std::string(wide.begin(), wide.end());
    551 }
    552 
    553 std::wstring ASCIIToWide(const base::StringPiece& ascii) {
    554   DCHECK(IsStringASCII(ascii)) << ascii;
    555   return std::wstring(ascii.begin(), ascii.end());
    556 }
    557 
    558 std::string UTF16ToASCII(const string16& utf16) {
    559   DCHECK(IsStringASCII(utf16)) << utf16;
    560   return std::string(utf16.begin(), utf16.end());
    561 }
    562 
    563 string16 ASCIIToUTF16(const base::StringPiece& ascii) {
    564   DCHECK(IsStringASCII(ascii)) << ascii;
    565   return string16(ascii.begin(), ascii.end());
    566 }
    567 
    568 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
    569 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
    570   std::string output;
    571   output.resize(wide.size());
    572   latin1->clear();
    573   for (size_t i = 0; i < wide.size(); i++) {
    574     if (wide[i] > 255)
    575       return false;
    576     output[i] = static_cast<char>(wide[i]);
    577   }
    578   latin1->swap(output);
    579   return true;
    580 }
    581 
    582 bool IsString8Bit(const std::wstring& str) {
    583   for (size_t i = 0; i < str.length(); i++) {
    584     if (str[i] > 255)
    585       return false;
    586   }
    587   return true;
    588 }
    589 
    590 template<class STR>
    591 static bool DoIsStringASCII(const STR& str) {
    592   for (size_t i = 0; i < str.length(); i++) {
    593     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
    594     if (c > 0x7F)
    595       return false;
    596   }
    597   return true;
    598 }
    599 
    600 bool IsStringASCII(const std::wstring& str) {
    601   return DoIsStringASCII(str);
    602 }
    603 
    604 #if !defined(WCHAR_T_IS_UTF16)
    605 bool IsStringASCII(const string16& str) {
    606   return DoIsStringASCII(str);
    607 }
    608 #endif
    609 
    610 bool IsStringASCII(const base::StringPiece& str) {
    611   return DoIsStringASCII(str);
    612 }
    613 
    614 // Helper functions that determine whether the given character begins a
    615 // UTF-8 sequence of bytes with the given length. A character satisfies
    616 // "IsInUTF8Sequence" if it is anything but the first byte in a multi-byte
    617 // character.
    618 static inline bool IsBegin2ByteUTF8(int c) {
    619   return (c & 0xE0) == 0xC0;
    620 }
    621 static inline bool IsBegin3ByteUTF8(int c) {
    622   return (c & 0xF0) == 0xE0;
    623 }
    624 static inline bool IsBegin4ByteUTF8(int c) {
    625   return (c & 0xF8) == 0xF0;
    626 }
    627 static inline bool IsInUTF8Sequence(int c) {
    628   return (c & 0xC0) == 0x80;
    629 }
    630 
    631 // This function was copied from Mozilla, with modifications. The original code
    632 // was 'IsUTF8' in xpcom/string/src/nsReadableUtils.cpp. The license block for
    633 // this function is:
    634 //   This function subject to the Mozilla Public License Version
    635 //   1.1 (the "License"); you may not use this code except in compliance with
    636 //   the License. You may obtain a copy of the License at
    637 //   http://www.mozilla.org/MPL/
    638 //
    639 //   Software distributed under the License is distributed on an "AS IS" basis,
    640 //   WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    641 //   for the specific language governing rights and limitations under the
    642 //   License.
    643 //
    644 //   The Original Code is mozilla.org code.
    645 //
    646 //   The Initial Developer of the Original Code is
    647 //   Netscape Communications Corporation.
    648 //   Portions created by the Initial Developer are Copyright (C) 2000
    649 //   the Initial Developer. All Rights Reserved.
    650 //
    651 //   Contributor(s):
    652 //     Scott Collins <scc (at) mozilla.org> (original author)
    653 //
    654 // This is a template so that it can be run on wide and 8-bit strings. We want
    655 // to run it on wide strings when we have input that we think may have
    656 // originally been UTF-8, but has been converted to wide characters because
    657 // that's what we (and Windows) use internally.
    658 template<typename CHAR>
    659 static bool IsStringUTF8T(const CHAR* str, size_t length) {
    660   bool overlong = false;
    661   bool surrogate = false;
    662   bool nonchar = false;
    663 
    664   // overlong byte upper bound
    665   typename ToUnsigned<CHAR>::Unsigned olupper = 0;
    666 
    667   // surrogate byte lower bound
    668   typename ToUnsigned<CHAR>::Unsigned slower = 0;
    669 
    670   // incremented when inside a multi-byte char to indicate how many bytes
    671   // are left in the sequence
    672   int positions_left = 0;
    673 
    674   for (uintptr_t i = 0; i < length; i++) {
    675     // This whole function assume an unsigned value so force its conversion to
    676     // an unsigned value.
    677     typename ToUnsigned<CHAR>::Unsigned c = str[i];
    678     if (c < 0x80)
    679       continue;  // ASCII
    680 
    681     if (c <= 0xC1) {
    682       // [80-BF] where not expected, [C0-C1] for overlong
    683       return false;
    684     } else if (IsBegin2ByteUTF8(c)) {
    685       positions_left = 1;
    686     } else if (IsBegin3ByteUTF8(c)) {
    687       positions_left = 2;
    688       if (c == 0xE0) {
    689         // to exclude E0[80-9F][80-BF]
    690         overlong = true;
    691         olupper = 0x9F;
    692       } else if (c == 0xED) {
    693         // ED[A0-BF][80-BF]: surrogate codepoint
    694         surrogate = true;
    695         slower = 0xA0;
    696       } else if (c == 0xEF) {
    697         // EF BF [BE-BF] : non-character
    698         // TODO(jungshik): EF B7 [90-AF] should be checked as well.
    699         nonchar = true;
    700       }
    701     } else if (c <= 0xF4) {
    702       positions_left = 3;
    703       nonchar = true;
    704       if (c == 0xF0) {
    705         // to exclude F0[80-8F][80-BF]{2}
    706         overlong = true;
    707         olupper = 0x8F;
    708       } else if (c == 0xF4) {
    709         // to exclude F4[90-BF][80-BF]
    710         // actually not surrogates but codepoints beyond 0x10FFFF
    711         surrogate = true;
    712         slower = 0x90;
    713       }
    714     } else {
    715       return false;
    716     }
    717 
    718     // eat the rest of this multi-byte character
    719     while (positions_left) {
    720       positions_left--;
    721       i++;
    722       c = str[i];
    723       if (!c)
    724         return false;  // end of string but not end of character sequence
    725 
    726       // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
    727       if (nonchar && ((!positions_left && c < 0xBE) ||
    728                       (positions_left == 1 && c != 0xBF) ||
    729                       (positions_left == 2 && 0x0F != (0x0F & c) ))) {
    730         nonchar = false;
    731       }
    732       if (!IsInUTF8Sequence(c) || (overlong && c <= olupper) ||
    733           (surrogate && slower <= c) || (nonchar && !positions_left) ) {
    734         return false;
    735       }
    736       overlong = surrogate = false;
    737     }
    738   }
    739   return true;
    740 }
    741 
    742 bool IsStringUTF8(const std::string& str) {
    743   return IsStringUTF8T(str.data(), str.length());
    744 }
    745 
    746 bool IsStringWideUTF8(const std::wstring& str) {
    747   return IsStringUTF8T(str.data(), str.length());
    748 }
    749 
    750 template<typename Iter>
    751 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
    752                                           Iter a_end,
    753                                           const char* b) {
    754   for (Iter it = a_begin; it != a_end; ++it, ++b) {
    755     if (!*b || ToLowerASCII(*it) != *b)
    756       return false;
    757   }
    758   return *b == 0;
    759 }
    760 
    761 // Front-ends for LowerCaseEqualsASCII.
    762 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
    763   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    764 }
    765 
    766 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
    767   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    768 }
    769 
    770 #if !defined(WCHAR_T_IS_UTF16)
    771 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
    772   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    773 }
    774 #endif
    775 
    776 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
    777                           std::string::const_iterator a_end,
    778                           const char* b) {
    779   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    780 }
    781 
    782 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
    783                           std::wstring::const_iterator a_end,
    784                           const char* b) {
    785   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    786 }
    787 
    788 #if !defined(WCHAR_T_IS_UTF16)
    789 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
    790                           string16::const_iterator a_end,
    791                           const char* b) {
    792   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    793 }
    794 #endif
    795 
    796 bool LowerCaseEqualsASCII(const char* a_begin,
    797                           const char* a_end,
    798                           const char* b) {
    799   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    800 }
    801 
    802 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
    803                           const wchar_t* a_end,
    804                           const char* b) {
    805   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    806 }
    807 
    808 #if !defined(WCHAR_T_IS_UTF16)
    809 bool LowerCaseEqualsASCII(const char16* a_begin,
    810                           const char16* a_end,
    811                           const char* b) {
    812   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    813 }
    814 #endif
    815 
    816 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
    817   if (a.length() != b.length())
    818     return false;
    819   return std::equal(b.begin(), b.end(), a.begin());
    820 }
    821 
    822 bool StartsWithASCII(const std::string& str,
    823                      const std::string& search,
    824                      bool case_sensitive) {
    825   if (case_sensitive)
    826     return str.compare(0, search.length(), search) == 0;
    827   else
    828     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
    829 }
    830 
    831 template <typename STR>
    832 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
    833   if (case_sensitive) {
    834     return str.compare(0, search.length(), search) == 0;
    835   } else {
    836     if (search.size() > str.size())
    837       return false;
    838     return std::equal(search.begin(), search.end(), str.begin(),
    839                       CaseInsensitiveCompare<typename STR::value_type>());
    840   }
    841 }
    842 
    843 bool StartsWith(const std::wstring& str, const std::wstring& search,
    844                 bool case_sensitive) {
    845   return StartsWithT(str, search, case_sensitive);
    846 }
    847 
    848 #if !defined(WCHAR_T_IS_UTF16)
    849 bool StartsWith(const string16& str, const string16& search,
    850                 bool case_sensitive) {
    851   return StartsWithT(str, search, case_sensitive);
    852 }
    853 #endif
    854 
    855 template <typename STR>
    856 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
    857   typename STR::size_type str_length = str.length();
    858   typename STR::size_type search_length = search.length();
    859   if (search_length > str_length)
    860     return false;
    861   if (case_sensitive) {
    862     return str.compare(str_length - search_length, search_length, search) == 0;
    863   } else {
    864     return std::equal(search.begin(), search.end(),
    865                       str.begin() + (str_length - search_length),
    866                       CaseInsensitiveCompare<typename STR::value_type>());
    867   }
    868 }
    869 
    870 bool EndsWith(const std::string& str, const std::string& search,
    871               bool case_sensitive) {
    872   return EndsWithT(str, search, case_sensitive);
    873 }
    874 
    875 bool EndsWith(const std::wstring& str, const std::wstring& search,
    876               bool case_sensitive) {
    877   return EndsWithT(str, search, case_sensitive);
    878 }
    879 
    880 #if !defined(WCHAR_T_IS_UTF16)
    881 bool EndsWith(const string16& str, const string16& search,
    882               bool case_sensitive) {
    883   return EndsWithT(str, search, case_sensitive);
    884 }
    885 #endif
    886 
    887 DataUnits GetByteDisplayUnits(int64 bytes) {
    888   // The byte thresholds at which we display amounts.  A byte count is displayed
    889   // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
    890   // This must match the DataUnits enum.
    891   static const int64 kUnitThresholds[] = {
    892     0,              // DATA_UNITS_BYTE,
    893     3*1024,         // DATA_UNITS_KIBIBYTE,
    894     2*1024*1024,    // DATA_UNITS_MEBIBYTE,
    895     1024*1024*1024  // DATA_UNITS_GIBIBYTE,
    896   };
    897 
    898   if (bytes < 0) {
    899     NOTREACHED() << "Negative bytes value";
    900     return DATA_UNITS_BYTE;
    901   }
    902 
    903   int unit_index = arraysize(kUnitThresholds);
    904   while (--unit_index > 0) {
    905     if (bytes >= kUnitThresholds[unit_index])
    906       break;
    907   }
    908 
    909   DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE);
    910   return DataUnits(unit_index);
    911 }
    912 
    913 // TODO(mpcomplete): deal with locale
    914 // Byte suffixes.  This must match the DataUnits enum.
    915 static const wchar_t* const kByteStrings[] = {
    916   L"B",
    917   L"kB",
    918   L"MB",
    919   L"GB"
    920 };
    921 
    922 static const wchar_t* const kSpeedStrings[] = {
    923   L"B/s",
    924   L"kB/s",
    925   L"MB/s",
    926   L"GB/s"
    927 };
    928 
    929 std::wstring FormatBytesInternal(int64 bytes,
    930                                  DataUnits units,
    931                                  bool show_units,
    932                                  const wchar_t* const* suffix) {
    933   if (bytes < 0) {
    934     NOTREACHED() << "Negative bytes value";
    935     return std::wstring();
    936   }
    937 
    938   DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE);
    939 
    940   // Put the quantity in the right units.
    941   double unit_amount = static_cast<double>(bytes);
    942   for (int i = 0; i < units; ++i)
    943     unit_amount /= 1024.0;
    944 
    945   wchar_t tmp[64];
    946   // If the first decimal digit is 0, don't show it.
    947   double int_part;
    948   double fractional_part = modf(unit_amount, &int_part);
    949   modf(fractional_part * 10, &int_part);
    950   if (int_part == 0) {
    951     base::swprintf(tmp, arraysize(tmp),
    952                    L"%lld", static_cast<int64>(unit_amount));
    953   } else {
    954     base::swprintf(tmp, arraysize(tmp), L"%.1lf", unit_amount);
    955   }
    956 
    957   std::wstring ret(tmp);
    958   if (show_units) {
    959     ret += L" ";
    960     ret += suffix[units];
    961   }
    962 
    963   return ret;
    964 }
    965 
    966 std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units) {
    967   return FormatBytesInternal(bytes, units, show_units, kByteStrings);
    968 }
    969 
    970 std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
    971   return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
    972 }
    973 
    974 template<class StringType>
    975 void DoReplaceSubstringsAfterOffset(StringType* str,
    976                                     typename StringType::size_type start_offset,
    977                                     const StringType& find_this,
    978                                     const StringType& replace_with,
    979                                     bool replace_all) {
    980   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
    981     return;
    982 
    983   DCHECK(!find_this.empty());
    984   for (typename StringType::size_type offs(str->find(find_this, start_offset));
    985       offs != StringType::npos; offs = str->find(find_this, offs)) {
    986     str->replace(offs, find_this.length(), replace_with);
    987     offs += replace_with.length();
    988 
    989     if (!replace_all)
    990       break;
    991   }
    992 }
    993 
    994 void ReplaceFirstSubstringAfterOffset(string16* str,
    995                                       string16::size_type start_offset,
    996                                       const string16& find_this,
    997                                       const string16& replace_with) {
    998   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    999                                  false);  // replace first instance
   1000 }
   1001 
   1002 void ReplaceFirstSubstringAfterOffset(std::string* str,
   1003                                       std::string::size_type start_offset,
   1004                                       const std::string& find_this,
   1005                                       const std::string& replace_with) {
   1006   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
   1007                                  false);  // replace first instance
   1008 }
   1009 
   1010 void ReplaceSubstringsAfterOffset(string16* str,
   1011                                   string16::size_type start_offset,
   1012                                   const string16& find_this,
   1013                                   const string16& replace_with) {
   1014   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
   1015                                  true);  // replace all instances
   1016 }
   1017 
   1018 void ReplaceSubstringsAfterOffset(std::string* str,
   1019                                   std::string::size_type start_offset,
   1020                                   const std::string& find_this,
   1021                                   const std::string& replace_with) {
   1022   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
   1023                                  true);  // replace all instances
   1024 }
   1025 
   1026 // Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
   1027 // is the size of the buffer. These return the number of characters in the
   1028 // formatted string excluding the NUL terminator. If the buffer is not
   1029 // large enough to accommodate the formatted string without truncation, they
   1030 // return the number of characters that would be in the fully-formatted string
   1031 // (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
   1032 inline int vsnprintfT(char* buffer,
   1033                       size_t buf_size,
   1034                       const char* format,
   1035                       va_list argptr) {
   1036   return base::vsnprintf(buffer, buf_size, format, argptr);
   1037 }
   1038 
   1039 inline int vsnprintfT(wchar_t* buffer,
   1040                       size_t buf_size,
   1041                       const wchar_t* format,
   1042                       va_list argptr) {
   1043   return base::vswprintf(buffer, buf_size, format, argptr);
   1044 }
   1045 
   1046 // Templatized backend for StringPrintF/StringAppendF. This does not finalize
   1047 // the va_list, the caller is expected to do that.
   1048 template <class StringType>
   1049 static void StringAppendVT(StringType* dst,
   1050                            const typename StringType::value_type* format,
   1051                            va_list ap) {
   1052   // First try with a small fixed size buffer.
   1053   // This buffer size should be kept in sync with StringUtilTest.GrowBoundary
   1054   // and StringUtilTest.StringPrintfBounds.
   1055   typename StringType::value_type stack_buf[1024];
   1056 
   1057   va_list ap_copy;
   1058   GG_VA_COPY(ap_copy, ap);
   1059 
   1060 #if !defined(OS_WIN)
   1061   errno = 0;
   1062 #endif
   1063   int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, ap_copy);
   1064   va_end(ap_copy);
   1065 
   1066   if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) {
   1067     // It fit.
   1068     dst->append(stack_buf, result);
   1069     return;
   1070   }
   1071 
   1072   // Repeatedly increase buffer size until it fits.
   1073   int mem_length = arraysize(stack_buf);
   1074   while (true) {
   1075     if (result < 0) {
   1076 #if !defined(OS_WIN)
   1077       // On Windows, vsnprintfT always returns the number of characters in a
   1078       // fully-formatted string, so if we reach this point, something else is
   1079       // wrong and no amount of buffer-doubling is going to fix it.
   1080       if (errno != 0 && errno != EOVERFLOW)
   1081 #endif
   1082       {
   1083         // If an error other than overflow occurred, it's never going to work.
   1084         DLOG(WARNING) << "Unable to printf the requested string due to error.";
   1085         return;
   1086       }
   1087       // Try doubling the buffer size.
   1088       mem_length *= 2;
   1089     } else {
   1090       // We need exactly "result + 1" characters.
   1091       mem_length = result + 1;
   1092     }
   1093 
   1094     if (mem_length > 32 * 1024 * 1024) {
   1095       // That should be plenty, don't try anything larger.  This protects
   1096       // against huge allocations when using vsnprintfT implementations that
   1097       // return -1 for reasons other than overflow without setting errno.
   1098       DLOG(WARNING) << "Unable to printf the requested string due to size.";
   1099       return;
   1100     }
   1101 
   1102     std::vector<typename StringType::value_type> mem_buf(mem_length);
   1103 
   1104     // NOTE: You can only use a va_list once.  Since we're in a while loop, we
   1105     // need to make a new copy each time so we don't use up the original.
   1106     GG_VA_COPY(ap_copy, ap);
   1107     result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
   1108     va_end(ap_copy);
   1109 
   1110     if ((result >= 0) && (result < mem_length)) {
   1111       // It fit.
   1112       dst->append(&mem_buf[0], result);
   1113       return;
   1114     }
   1115   }
   1116 }
   1117 
   1118 namespace {
   1119 
   1120 template <typename STR, typename INT, typename UINT, bool NEG>
   1121 struct IntToStringT {
   1122   // This is to avoid a compiler warning about unary minus on unsigned type.
   1123   // For example, say you had the following code:
   1124   //   template <typename INT>
   1125   //   INT abs(INT value) { return value < 0 ? -value : value; }
   1126   // Even though if INT is unsigned, it's impossible for value < 0, so the
   1127   // unary minus will never be taken, the compiler will still generate a
   1128   // warning.  We do a little specialization dance...
   1129   template <typename INT2, typename UINT2, bool NEG2>
   1130   struct ToUnsignedT { };
   1131 
   1132   template <typename INT2, typename UINT2>
   1133   struct ToUnsignedT<INT2, UINT2, false> {
   1134     static UINT2 ToUnsigned(INT2 value) {
   1135       return static_cast<UINT2>(value);
   1136     }
   1137   };
   1138 
   1139   template <typename INT2, typename UINT2>
   1140   struct ToUnsignedT<INT2, UINT2, true> {
   1141     static UINT2 ToUnsigned(INT2 value) {
   1142       return static_cast<UINT2>(value < 0 ? -value : value);
   1143     }
   1144   };
   1145 
   1146   static STR IntToString(INT value) {
   1147     // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
   1148     // So round up to allocate 3 output characters per byte, plus 1 for '-'.
   1149     const int kOutputBufSize = 3 * sizeof(INT) + 1;
   1150 
   1151     // Allocate the whole string right away, we will right back to front, and
   1152     // then return the substr of what we ended up using.
   1153     STR outbuf(kOutputBufSize, 0);
   1154 
   1155     bool is_neg = value < 0;
   1156     // Even though is_neg will never be true when INT is parameterized as
   1157     // unsigned, even the presence of the unary operation causes a warning.
   1158     UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
   1159 
   1160     for (typename STR::iterator it = outbuf.end();;) {
   1161       --it;
   1162       DCHECK(it != outbuf.begin());
   1163       *it = static_cast<typename STR::value_type>((res % 10) + '0');
   1164       res /= 10;
   1165 
   1166       // We're done..
   1167       if (res == 0) {
   1168         if (is_neg) {
   1169           --it;
   1170           DCHECK(it != outbuf.begin());
   1171           *it = static_cast<typename STR::value_type>('-');
   1172         }
   1173         return STR(it, outbuf.end());
   1174       }
   1175     }
   1176     NOTREACHED();
   1177     return STR();
   1178   }
   1179 };
   1180 
   1181 }
   1182 
   1183 std::string IntToString(int value) {
   1184   return IntToStringT<std::string, int, unsigned int, true>::
   1185       IntToString(value);
   1186 }
   1187 std::wstring IntToWString(int value) {
   1188   return IntToStringT<std::wstring, int, unsigned int, true>::
   1189       IntToString(value);
   1190 }
   1191 string16 IntToString16(int value) {
   1192   return IntToStringT<string16, int, unsigned int, true>::
   1193       IntToString(value);
   1194 }
   1195 std::string UintToString(unsigned int value) {
   1196   return IntToStringT<std::string, unsigned int, unsigned int, false>::
   1197       IntToString(value);
   1198 }
   1199 std::wstring UintToWString(unsigned int value) {
   1200   return IntToStringT<std::wstring, unsigned int, unsigned int, false>::
   1201       IntToString(value);
   1202 }
   1203 string16 UintToString16(unsigned int value) {
   1204   return IntToStringT<string16, unsigned int, unsigned int, false>::
   1205       IntToString(value);
   1206 }
   1207 std::string Int64ToString(int64 value) {
   1208   return IntToStringT<std::string, int64, uint64, true>::
   1209       IntToString(value);
   1210 }
   1211 std::wstring Int64ToWString(int64 value) {
   1212   return IntToStringT<std::wstring, int64, uint64, true>::
   1213       IntToString(value);
   1214 }
   1215 std::string Uint64ToString(uint64 value) {
   1216   return IntToStringT<std::string, uint64, uint64, false>::
   1217       IntToString(value);
   1218 }
   1219 std::wstring Uint64ToWString(uint64 value) {
   1220   return IntToStringT<std::wstring, uint64, uint64, false>::
   1221       IntToString(value);
   1222 }
   1223 
   1224 std::string DoubleToString(double value) {
   1225   // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
   1226   char buffer[32];
   1227   dmg_fp::g_fmt(buffer, value);
   1228   return std::string(buffer);
   1229 }
   1230 
   1231 std::wstring DoubleToWString(double value) {
   1232   return ASCIIToWide(DoubleToString(value));
   1233 }
   1234 
   1235 void StringAppendV(std::string* dst, const char* format, va_list ap) {
   1236   StringAppendVT(dst, format, ap);
   1237 }
   1238 
   1239 void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
   1240   StringAppendVT(dst, format, ap);
   1241 }
   1242 
   1243 std::string StringPrintf(const char* format, ...) {
   1244   va_list ap;
   1245   va_start(ap, format);
   1246   std::string result;
   1247   StringAppendV(&result, format, ap);
   1248   va_end(ap);
   1249   return result;
   1250 }
   1251 
   1252 std::wstring StringPrintf(const wchar_t* format, ...) {
   1253   va_list ap;
   1254   va_start(ap, format);
   1255   std::wstring result;
   1256   StringAppendV(&result, format, ap);
   1257   va_end(ap);
   1258   return result;
   1259 }
   1260 
   1261 std::string StringPrintV(const char* format, va_list ap) {
   1262   std::string result;
   1263   StringAppendV(&result, format, ap);
   1264   return result;
   1265 }
   1266 
   1267 const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
   1268   va_list ap;
   1269   va_start(ap, format);
   1270   dst->clear();
   1271   StringAppendV(dst, format, ap);
   1272   va_end(ap);
   1273   return *dst;
   1274 }
   1275 
   1276 const std::wstring& SStringPrintf(std::wstring* dst,
   1277                                   const wchar_t* format, ...) {
   1278   va_list ap;
   1279   va_start(ap, format);
   1280   dst->clear();
   1281   StringAppendV(dst, format, ap);
   1282   va_end(ap);
   1283   return *dst;
   1284 }
   1285 
   1286 void StringAppendF(std::string* dst, const char* format, ...) {
   1287   va_list ap;
   1288   va_start(ap, format);
   1289   StringAppendV(dst, format, ap);
   1290   va_end(ap);
   1291 }
   1292 
   1293 void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
   1294   va_list ap;
   1295   va_start(ap, format);
   1296   StringAppendV(dst, format, ap);
   1297   va_end(ap);
   1298 }
   1299 
   1300 template<typename STR>
   1301 static void SplitStringT(const STR& str,
   1302                          const typename STR::value_type s,
   1303                          bool trim_whitespace,
   1304                          std::vector<STR>* r) {
   1305   size_t last = 0;
   1306   size_t i;
   1307   size_t c = str.size();
   1308   for (i = 0; i <= c; ++i) {
   1309     if (i == c || str[i] == s) {
   1310       size_t len = i - last;
   1311       STR tmp = str.substr(last, len);
   1312       if (trim_whitespace) {
   1313         STR t_tmp;
   1314         TrimWhitespace(tmp, TRIM_ALL, &t_tmp);
   1315         r->push_back(t_tmp);
   1316       } else {
   1317         r->push_back(tmp);
   1318       }
   1319       last = i + 1;
   1320     }
   1321   }
   1322 }
   1323 
   1324 void SplitString(const std::wstring& str,
   1325                  wchar_t s,
   1326                  std::vector<std::wstring>* r) {
   1327   SplitStringT(str, s, true, r);
   1328 }
   1329 
   1330 #if !defined(WCHAR_T_IS_UTF16)
   1331 void SplitString(const string16& str,
   1332                  char16 s,
   1333                  std::vector<string16>* r) {
   1334   SplitStringT(str, s, true, r);
   1335 }
   1336 #endif
   1337 
   1338 void SplitString(const std::string& str,
   1339                  char s,
   1340                  std::vector<std::string>* r) {
   1341   SplitStringT(str, s, true, r);
   1342 }
   1343 
   1344 void SplitStringDontTrim(const std::wstring& str,
   1345                          wchar_t s,
   1346                          std::vector<std::wstring>* r) {
   1347   SplitStringT(str, s, false, r);
   1348 }
   1349 
   1350 #if !defined(WCHAR_T_IS_UTF16)
   1351 void SplitStringDontTrim(const string16& str,
   1352                          char16 s,
   1353                          std::vector<string16>* r) {
   1354   SplitStringT(str, s, false, r);
   1355 }
   1356 #endif
   1357 
   1358 void SplitStringDontTrim(const std::string& str,
   1359                          char s,
   1360                          std::vector<std::string>* r) {
   1361   SplitStringT(str, s, false, r);
   1362 }
   1363 
   1364 template<typename STR>
   1365 static size_t TokenizeT(const STR& str,
   1366                         const STR& delimiters,
   1367                         std::vector<STR>* tokens) {
   1368   tokens->clear();
   1369 
   1370   typename STR::size_type start = str.find_first_not_of(delimiters);
   1371   while (start != STR::npos) {
   1372     typename STR::size_type end = str.find_first_of(delimiters, start + 1);
   1373     if (end == STR::npos) {
   1374       tokens->push_back(str.substr(start));
   1375       break;
   1376     } else {
   1377       tokens->push_back(str.substr(start, end - start));
   1378       start = str.find_first_not_of(delimiters, end + 1);
   1379     }
   1380   }
   1381 
   1382   return tokens->size();
   1383 }
   1384 
   1385 size_t Tokenize(const std::wstring& str,
   1386                 const std::wstring& delimiters,
   1387                 std::vector<std::wstring>* tokens) {
   1388   return TokenizeT(str, delimiters, tokens);
   1389 }
   1390 
   1391 #if !defined(WCHAR_T_IS_UTF16)
   1392 size_t Tokenize(const string16& str,
   1393                 const string16& delimiters,
   1394                 std::vector<string16>* tokens) {
   1395   return TokenizeT(str, delimiters, tokens);
   1396 }
   1397 #endif
   1398 
   1399 size_t Tokenize(const std::string& str,
   1400                 const std::string& delimiters,
   1401                 std::vector<std::string>* tokens) {
   1402   return TokenizeT(str, delimiters, tokens);
   1403 }
   1404 
   1405 template<typename STR>
   1406 static STR JoinStringT(const std::vector<STR>& parts,
   1407                        typename STR::value_type sep) {
   1408   if (parts.size() == 0) return STR();
   1409 
   1410   STR result(parts[0]);
   1411   typename std::vector<STR>::const_iterator iter = parts.begin();
   1412   ++iter;
   1413 
   1414   for (; iter != parts.end(); ++iter) {
   1415     result += sep;
   1416     result += *iter;
   1417   }
   1418 
   1419   return result;
   1420 }
   1421 
   1422 std::string JoinString(const std::vector<std::string>& parts, char sep) {
   1423   return JoinStringT(parts, sep);
   1424 }
   1425 
   1426 #if !defined(WCHAR_T_IS_UTF16)
   1427 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
   1428   return JoinStringT(parts, sep);
   1429 }
   1430 #endif
   1431 
   1432 std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t sep) {
   1433   return JoinStringT(parts, sep);
   1434 }
   1435 
   1436 template<typename STR>
   1437 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
   1438   const size_t length = str.length();
   1439   if (!length)
   1440     return;
   1441 
   1442   bool last_was_ws = false;
   1443   size_t last_non_ws_start = 0;
   1444   for (size_t i = 0; i < length; ++i) {
   1445     switch (str[i]) {
   1446       // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
   1447       case L' ':
   1448       case L'\t':
   1449       case L'\xA':
   1450       case L'\xB':
   1451       case L'\xC':
   1452       case L'\xD':
   1453         if (!last_was_ws) {
   1454           if (i > 0) {
   1455             result->push_back(
   1456                 str.substr(last_non_ws_start, i - last_non_ws_start));
   1457           }
   1458           last_was_ws = true;
   1459         }
   1460         break;
   1461 
   1462       default:  // Not a space character.
   1463         if (last_was_ws) {
   1464           last_was_ws = false;
   1465           last_non_ws_start = i;
   1466         }
   1467         break;
   1468     }
   1469   }
   1470   if (!last_was_ws) {
   1471     result->push_back(
   1472         str.substr(last_non_ws_start, length - last_non_ws_start));
   1473   }
   1474 }
   1475 
   1476 void SplitStringAlongWhitespace(const std::wstring& str,
   1477                                 std::vector<std::wstring>* result) {
   1478   SplitStringAlongWhitespaceT(str, result);
   1479 }
   1480 
   1481 #if !defined(WCHAR_T_IS_UTF16)
   1482 void SplitStringAlongWhitespace(const string16& str,
   1483                                 std::vector<string16>* result) {
   1484   SplitStringAlongWhitespaceT(str, result);
   1485 }
   1486 #endif
   1487 
   1488 void SplitStringAlongWhitespace(const std::string& str,
   1489                                 std::vector<std::string>* result) {
   1490   SplitStringAlongWhitespaceT(str, result);
   1491 }
   1492 
   1493 template<class FormatStringType, class OutStringType>
   1494 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
   1495     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
   1496   size_t substitutions = subst.size();
   1497   DCHECK(substitutions < 10);
   1498 
   1499   size_t sub_length = 0;
   1500   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
   1501        iter != subst.end(); ++iter) {
   1502     sub_length += (*iter).length();
   1503   }
   1504 
   1505   OutStringType formatted;
   1506   formatted.reserve(format_string.length() + sub_length);
   1507 
   1508   std::vector<ReplacementOffset> r_offsets;
   1509   for (typename FormatStringType::const_iterator i = format_string.begin();
   1510        i != format_string.end(); ++i) {
   1511     if ('$' == *i) {
   1512       if (i + 1 != format_string.end()) {
   1513         ++i;
   1514         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
   1515         if ('$' == *i) {
   1516           formatted.push_back('$');
   1517         } else {
   1518           uintptr_t index = *i - '1';
   1519           if (offsets) {
   1520             ReplacementOffset r_offset(index,
   1521                 static_cast<int>(formatted.size()));
   1522             r_offsets.insert(std::lower_bound(r_offsets.begin(),
   1523                 r_offsets.end(), r_offset,
   1524                 &CompareParameter),
   1525                 r_offset);
   1526           }
   1527           if (index < substitutions)
   1528             formatted.append(subst.at(index));
   1529         }
   1530       }
   1531     } else {
   1532       formatted.push_back(*i);
   1533     }
   1534   }
   1535   if (offsets) {
   1536     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
   1537         i != r_offsets.end(); ++i) {
   1538       offsets->push_back(i->offset);
   1539     }
   1540   }
   1541   return formatted;
   1542 }
   1543 
   1544 string16 ReplaceStringPlaceholders(const string16& format_string,
   1545                                    const std::vector<string16>& subst,
   1546                                    std::vector<size_t>* offsets) {
   1547   return DoReplaceStringPlaceholders(format_string, subst, offsets);
   1548 }
   1549 
   1550 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
   1551                                       const std::vector<std::string>& subst,
   1552                                       std::vector<size_t>* offsets) {
   1553   return DoReplaceStringPlaceholders(format_string, subst, offsets);
   1554 }
   1555 
   1556 string16 ReplaceStringPlaceholders(const string16& format_string,
   1557                                    const string16& a,
   1558                                    size_t* offset) {
   1559   std::vector<size_t> offsets;
   1560   std::vector<string16> subst;
   1561   subst.push_back(a);
   1562   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
   1563 
   1564   DCHECK(offsets.size() == 1);
   1565   if (offset) {
   1566     *offset = offsets[0];
   1567   }
   1568   return result;
   1569 }
   1570 
   1571 template <class CHAR>
   1572 static bool IsWildcard(CHAR character) {
   1573   return character == '*' || character == '?';
   1574 }
   1575 
   1576 // Move the strings pointers to the point where they start to differ.
   1577 template <class CHAR>
   1578 static void EatSameChars(const CHAR** pattern, const CHAR** string) {
   1579   bool escaped = false;
   1580   while (**pattern && **string) {
   1581     if (!escaped && IsWildcard(**pattern)) {
   1582       // We don't want to match wildcard here, except if it's escaped.
   1583       return;
   1584     }
   1585 
   1586     // Check if the escapement char is found. If so, skip it and move to the
   1587     // next character.
   1588     if (!escaped && **pattern == L'\\') {
   1589       escaped = true;
   1590       (*pattern)++;
   1591       continue;
   1592     }
   1593 
   1594     // Check if the chars match, if so, increment the ptrs.
   1595     if (**pattern == **string) {
   1596       (*pattern)++;
   1597       (*string)++;
   1598     } else {
   1599       // Uh ho, it did not match, we are done. If the last char was an
   1600       // escapement, that means that it was an error to advance the ptr here,
   1601       // let's put it back where it was. This also mean that the MatchPattern
   1602       // function will return false because if we can't match an escape char
   1603       // here, then no one will.
   1604       if (escaped) {
   1605         (*pattern)--;
   1606       }
   1607       return;
   1608     }
   1609 
   1610     escaped = false;
   1611   }
   1612 }
   1613 
   1614 template <class CHAR>
   1615 static void EatWildcard(const CHAR** pattern) {
   1616   while (**pattern) {
   1617     if (!IsWildcard(**pattern))
   1618       return;
   1619     (*pattern)++;
   1620   }
   1621 }
   1622 
   1623 template <class CHAR>
   1624 static bool MatchPatternT(const CHAR* eval, const CHAR* pattern, int depth) {
   1625   const int kMaxDepth = 16;
   1626   if (depth > kMaxDepth)
   1627     return false;
   1628 
   1629   // Eat all the matching chars.
   1630   EatSameChars(&pattern, &eval);
   1631 
   1632   // If the string is empty, then the pattern must be empty too, or contains
   1633   // only wildcards.
   1634   if (*eval == 0) {
   1635     EatWildcard(&pattern);
   1636     if (*pattern)
   1637       return false;
   1638     return true;
   1639   }
   1640 
   1641   // Pattern is empty but not string, this is not a match.
   1642   if (*pattern == 0)
   1643     return false;
   1644 
   1645   // If this is a question mark, then we need to compare the rest with
   1646   // the current string or the string with one character eaten.
   1647   if (pattern[0] == '?') {
   1648     if (MatchPatternT(eval, pattern + 1, depth + 1) ||
   1649         MatchPatternT(eval + 1, pattern + 1, depth + 1))
   1650       return true;
   1651   }
   1652 
   1653   // This is a *, try to match all the possible substrings with the remainder
   1654   // of the pattern.
   1655   if (pattern[0] == '*') {
   1656     while (*eval) {
   1657       if (MatchPatternT(eval, pattern + 1, depth + 1))
   1658         return true;
   1659       eval++;
   1660     }
   1661 
   1662     // We reached the end of the string, let see if the pattern contains only
   1663     // wildcards.
   1664     if (*eval == 0) {
   1665       EatWildcard(&pattern);
   1666       if (*pattern)
   1667         return false;
   1668       return true;
   1669     }
   1670   }
   1671 
   1672   return false;
   1673 }
   1674 
   1675 bool MatchPatternWide(const std::wstring& eval, const std::wstring& pattern) {
   1676   return MatchPatternT(eval.c_str(), pattern.c_str(), 0);
   1677 }
   1678 
   1679 bool MatchPatternASCII(const std::string& eval, const std::string& pattern) {
   1680   DCHECK(IsStringASCII(eval) && IsStringASCII(pattern));
   1681   return MatchPatternT(eval.c_str(), pattern.c_str(), 0);
   1682 }
   1683 
   1684 bool StringToInt(const std::string& input, int* output) {
   1685   return StringToNumber<StringToIntTraits>(input, output);
   1686 }
   1687 
   1688 bool StringToInt(const string16& input, int* output) {
   1689   return StringToNumber<String16ToIntTraits>(input, output);
   1690 }
   1691 
   1692 bool StringToInt64(const std::string& input, int64* output) {
   1693   return StringToNumber<StringToInt64Traits>(input, output);
   1694 }
   1695 
   1696 bool StringToInt64(const string16& input, int64* output) {
   1697   return StringToNumber<String16ToInt64Traits>(input, output);
   1698 }
   1699 
   1700 bool HexStringToInt(const std::string& input, int* output) {
   1701   return StringToNumber<HexStringToIntTraits>(input, output);
   1702 }
   1703 
   1704 bool HexStringToInt(const string16& input, int* output) {
   1705   return StringToNumber<HexString16ToIntTraits>(input, output);
   1706 }
   1707 
   1708 namespace {
   1709 
   1710 template<class CHAR>
   1711 bool HexDigitToIntT(const CHAR digit, uint8* val) {
   1712   if (digit >= '0' && digit <= '9')
   1713     *val = digit - '0';
   1714   else if (digit >= 'a' && digit <= 'f')
   1715     *val = 10 + digit - 'a';
   1716   else if (digit >= 'A' && digit <= 'F')
   1717     *val = 10 + digit - 'A';
   1718   else
   1719     return false;
   1720   return true;
   1721 }
   1722 
   1723 template<typename STR>
   1724 bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
   1725   DCHECK(output->size() == 0);
   1726   size_t count = input.size();
   1727   if (count == 0 || (count % 2) != 0)
   1728     return false;
   1729   for (uintptr_t i = 0; i < count / 2; ++i) {
   1730     uint8 msb = 0;  // most significant 4 bits
   1731     uint8 lsb = 0;  // least significant 4 bits
   1732     if (!HexDigitToIntT(input[i * 2], &msb) ||
   1733         !HexDigitToIntT(input[i * 2 + 1], &lsb))
   1734       return false;
   1735     output->push_back((msb << 4) | lsb);
   1736   }
   1737   return true;
   1738 }
   1739 
   1740 }  // namespace
   1741 
   1742 bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
   1743   return HexStringToBytesT(input, output);
   1744 }
   1745 
   1746 bool HexStringToBytes(const string16& input, std::vector<uint8>* output) {
   1747   return HexStringToBytesT(input, output);
   1748 }
   1749 
   1750 int StringToInt(const std::string& value) {
   1751   int result;
   1752   StringToInt(value, &result);
   1753   return result;
   1754 }
   1755 
   1756 int StringToInt(const string16& value) {
   1757   int result;
   1758   StringToInt(value, &result);
   1759   return result;
   1760 }
   1761 
   1762 int64 StringToInt64(const std::string& value) {
   1763   int64 result;
   1764   StringToInt64(value, &result);
   1765   return result;
   1766 }
   1767 
   1768 int64 StringToInt64(const string16& value) {
   1769   int64 result;
   1770   StringToInt64(value, &result);
   1771   return result;
   1772 }
   1773 
   1774 int HexStringToInt(const std::string& value) {
   1775   int result;
   1776   HexStringToInt(value, &result);
   1777   return result;
   1778 }
   1779 
   1780 int HexStringToInt(const string16& value) {
   1781   int result;
   1782   HexStringToInt(value, &result);
   1783   return result;
   1784 }
   1785 
   1786 bool StringToDouble(const std::string& input, double* output) {
   1787   return StringToNumber<StringToDoubleTraits>(input, output);
   1788 }
   1789 
   1790 bool StringToDouble(const string16& input, double* output) {
   1791   return StringToNumber<String16ToDoubleTraits>(input, output);
   1792 }
   1793 
   1794 double StringToDouble(const std::string& value) {
   1795   double result;
   1796   StringToDouble(value, &result);
   1797   return result;
   1798 }
   1799 
   1800 double StringToDouble(const string16& value) {
   1801   double result;
   1802   StringToDouble(value, &result);
   1803   return result;
   1804 }
   1805 
   1806 // The following code is compatible with the OpenBSD lcpy interface.  See:
   1807 //   http://www.gratisoft.us/todd/papers/strlcpy.html
   1808 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
   1809 
   1810 namespace {
   1811 
   1812 template <typename CHAR>
   1813 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
   1814   for (size_t i = 0; i < dst_size; ++i) {
   1815     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
   1816       return i;
   1817   }
   1818 
   1819   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
   1820   if (dst_size != 0)
   1821     dst[dst_size - 1] = 0;
   1822 
   1823   // Count the rest of the |src|, and return it's length in characters.
   1824   while (src[dst_size]) ++dst_size;
   1825   return dst_size;
   1826 }
   1827 
   1828 }  // namespace
   1829 
   1830 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
   1831   return lcpyT<char>(dst, src, dst_size);
   1832 }
   1833 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
   1834   return lcpyT<wchar_t>(dst, src, dst_size);
   1835 }
   1836 
   1837 bool ElideString(const std::wstring& input, int max_len, std::wstring* output) {
   1838   DCHECK(max_len >= 0);
   1839   if (static_cast<int>(input.length()) <= max_len) {
   1840     output->assign(input);
   1841     return false;
   1842   }
   1843 
   1844   switch (max_len) {
   1845     case 0:
   1846       output->clear();
   1847       break;
   1848     case 1:
   1849       output->assign(input.substr(0, 1));
   1850       break;
   1851     case 2:
   1852       output->assign(input.substr(0, 2));
   1853       break;
   1854     case 3:
   1855       output->assign(input.substr(0, 1) + L"." +
   1856                      input.substr(input.length() - 1));
   1857       break;
   1858     case 4:
   1859       output->assign(input.substr(0, 1) + L".." +
   1860                      input.substr(input.length() - 1));
   1861       break;
   1862     default: {
   1863       int rstr_len = (max_len - 3) / 2;
   1864       int lstr_len = rstr_len + ((max_len - 3) % 2);
   1865       output->assign(input.substr(0, lstr_len) + L"..." +
   1866                      input.substr(input.length() - rstr_len));
   1867       break;
   1868     }
   1869   }
   1870 
   1871   return true;
   1872 }
   1873 
   1874 std::string HexEncode(const void* bytes, size_t size) {
   1875   static const char kHexChars[] = "0123456789ABCDEF";
   1876 
   1877   // Each input byte creates two output hex characters.
   1878   std::string ret(size * 2, '\0');
   1879 
   1880   for (size_t i = 0; i < size; ++i) {
   1881     char b = reinterpret_cast<const char*>(bytes)[i];
   1882     ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
   1883     ret[(i * 2) + 1] = kHexChars[b & 0xf];
   1884   }
   1885   return ret;
   1886 }
   1887