Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <ctype.h>
      8 #include <errno.h>
      9 #include <math.h>
     10 #include <stdarg.h>
     11 #include <stdio.h>
     12 #include <stdlib.h>
     13 #include <string.h>
     14 #include <time.h>
     15 #include <wchar.h>
     16 #include <wctype.h>
     17 
     18 #include <algorithm>
     19 #include <vector>
     20 
     21 #include "base/basictypes.h"
     22 #include "base/logging.h"
     23 #include "base/memory/singleton.h"
     24 #include "base/strings/utf_string_conversion_utils.h"
     25 #include "base/strings/utf_string_conversions.h"
     26 #include "base/third_party/icu/icu_utf.h"
     27 #include "build/build_config.h"
     28 
     29 namespace {
     30 
     31 // Force the singleton used by Empty[W]String[16] to be a unique type. This
     32 // prevents other code that might accidentally use Singleton<string> from
     33 // getting our internal one.
     34 struct EmptyStrings {
     35   EmptyStrings() {}
     36   const std::string s;
     37   const std::wstring ws;
     38   const string16 s16;
     39 
     40   static EmptyStrings* GetInstance() {
     41     return Singleton<EmptyStrings>::get();
     42   }
     43 };
     44 
     45 // Used by ReplaceStringPlaceholders to track the position in the string of
     46 // replaced parameters.
     47 struct ReplacementOffset {
     48   ReplacementOffset(uintptr_t parameter, size_t offset)
     49       : parameter(parameter),
     50         offset(offset) {}
     51 
     52   // Index of the parameter.
     53   uintptr_t parameter;
     54 
     55   // Starting position in the string.
     56   size_t offset;
     57 };
     58 
     59 static bool CompareParameter(const ReplacementOffset& elem1,
     60                              const ReplacementOffset& elem2) {
     61   return elem1.parameter < elem2.parameter;
     62 }
     63 
     64 }  // namespace
     65 
     66 namespace base {
     67 
     68 bool IsWprintfFormatPortable(const wchar_t* format) {
     69   for (const wchar_t* position = format; *position != '\0'; ++position) {
     70     if (*position == '%') {
     71       bool in_specification = true;
     72       bool modifier_l = false;
     73       while (in_specification) {
     74         // Eat up characters until reaching a known specifier.
     75         if (*++position == '\0') {
     76           // The format string ended in the middle of a specification.  Call
     77           // it portable because no unportable specifications were found.  The
     78           // string is equally broken on all platforms.
     79           return true;
     80         }
     81 
     82         if (*position == 'l') {
     83           // 'l' is the only thing that can save the 's' and 'c' specifiers.
     84           modifier_l = true;
     85         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
     86                    *position == 'S' || *position == 'C' || *position == 'F' ||
     87                    *position == 'D' || *position == 'O' || *position == 'U') {
     88           // Not portable.
     89           return false;
     90         }
     91 
     92         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
     93           // Portable, keep scanning the rest of the format string.
     94           in_specification = false;
     95         }
     96       }
     97     }
     98   }
     99 
    100   return true;
    101 }
    102 
    103 }  // namespace base
    104 
    105 
    106 const std::string& EmptyString() {
    107   return EmptyStrings::GetInstance()->s;
    108 }
    109 
    110 const std::wstring& EmptyWString() {
    111   return EmptyStrings::GetInstance()->ws;
    112 }
    113 
    114 const string16& EmptyString16() {
    115   return EmptyStrings::GetInstance()->s16;
    116 }
    117 
    118 template<typename STR>
    119 bool ReplaceCharsT(const STR& input,
    120                    const typename STR::value_type replace_chars[],
    121                    const STR& replace_with,
    122                    STR* output) {
    123   bool removed = false;
    124   size_t replace_length = replace_with.length();
    125 
    126   *output = input;
    127 
    128   size_t found = output->find_first_of(replace_chars);
    129   while (found != STR::npos) {
    130     removed = true;
    131     output->replace(found, 1, replace_with);
    132     found = output->find_first_of(replace_chars, found + replace_length);
    133   }
    134 
    135   return removed;
    136 }
    137 
    138 bool ReplaceChars(const string16& input,
    139                   const char16 replace_chars[],
    140                   const string16& replace_with,
    141                   string16* output) {
    142   return ReplaceCharsT(input, replace_chars, replace_with, output);
    143 }
    144 
    145 bool ReplaceChars(const std::string& input,
    146                   const char replace_chars[],
    147                   const std::string& replace_with,
    148                   std::string* output) {
    149   return ReplaceCharsT(input, replace_chars, replace_with, output);
    150 }
    151 
    152 bool RemoveChars(const string16& input,
    153                  const char16 remove_chars[],
    154                  string16* output) {
    155   return ReplaceChars(input, remove_chars, string16(), output);
    156 }
    157 
    158 bool RemoveChars(const std::string& input,
    159                  const char remove_chars[],
    160                  std::string* output) {
    161   return ReplaceChars(input, remove_chars, std::string(), output);
    162 }
    163 
    164 template<typename STR>
    165 TrimPositions TrimStringT(const STR& input,
    166                           const typename STR::value_type trim_chars[],
    167                           TrimPositions positions,
    168                           STR* output) {
    169   // Find the edges of leading/trailing whitespace as desired.
    170   const typename STR::size_type last_char = input.length() - 1;
    171   const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
    172       input.find_first_not_of(trim_chars) : 0;
    173   const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
    174       input.find_last_not_of(trim_chars) : last_char;
    175 
    176   // When the string was all whitespace, report that we stripped off whitespace
    177   // from whichever position the caller was interested in.  For empty input, we
    178   // stripped no whitespace, but we still need to clear |output|.
    179   if (input.empty() ||
    180       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
    181     bool input_was_empty = input.empty();  // in case output == &input
    182     output->clear();
    183     return input_was_empty ? TRIM_NONE : positions;
    184   }
    185 
    186   // Trim the whitespace.
    187   *output =
    188       input.substr(first_good_char, last_good_char - first_good_char + 1);
    189 
    190   // Return where we trimmed from.
    191   return static_cast<TrimPositions>(
    192       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
    193       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
    194 }
    195 
    196 bool TrimString(const std::wstring& input,
    197                 const wchar_t trim_chars[],
    198                 std::wstring* output) {
    199   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    200 }
    201 
    202 #if !defined(WCHAR_T_IS_UTF16)
    203 bool TrimString(const string16& input,
    204                 const char16 trim_chars[],
    205                 string16* output) {
    206   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    207 }
    208 #endif
    209 
    210 bool TrimString(const std::string& input,
    211                 const char trim_chars[],
    212                 std::string* output) {
    213   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    214 }
    215 
    216 void TruncateUTF8ToByteSize(const std::string& input,
    217                             const size_t byte_size,
    218                             std::string* output) {
    219   DCHECK(output);
    220   if (byte_size > input.length()) {
    221     *output = input;
    222     return;
    223   }
    224   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
    225   // Note: This cast is necessary because CBU8_NEXT uses int32s.
    226   int32 truncation_length = static_cast<int32>(byte_size);
    227   int32 char_index = truncation_length - 1;
    228   const char* data = input.data();
    229 
    230   // Using CBU8, we will move backwards from the truncation point
    231   // to the beginning of the string looking for a valid UTF8
    232   // character.  Once a full UTF8 character is found, we will
    233   // truncate the string to the end of that character.
    234   while (char_index >= 0) {
    235     int32 prev = char_index;
    236     uint32 code_point = 0;
    237     CBU8_NEXT(data, char_index, truncation_length, code_point);
    238     if (!base::IsValidCharacter(code_point) ||
    239         !base::IsValidCodepoint(code_point)) {
    240       char_index = prev - 1;
    241     } else {
    242       break;
    243     }
    244   }
    245 
    246   if (char_index >= 0 )
    247     *output = input.substr(0, char_index);
    248   else
    249     output->clear();
    250 }
    251 
    252 TrimPositions TrimWhitespace(const string16& input,
    253                              TrimPositions positions,
    254                              string16* output) {
    255   return TrimStringT(input, kWhitespaceUTF16, positions, output);
    256 }
    257 
    258 TrimPositions TrimWhitespaceASCII(const std::string& input,
    259                                   TrimPositions positions,
    260                                   std::string* output) {
    261   return TrimStringT(input, kWhitespaceASCII, positions, output);
    262 }
    263 
    264 // This function is only for backward-compatibility.
    265 // To be removed when all callers are updated.
    266 TrimPositions TrimWhitespace(const std::string& input,
    267                              TrimPositions positions,
    268                              std::string* output) {
    269   return TrimWhitespaceASCII(input, positions, output);
    270 }
    271 
    272 template<typename STR>
    273 STR CollapseWhitespaceT(const STR& text,
    274                         bool trim_sequences_with_line_breaks) {
    275   STR result;
    276   result.resize(text.size());
    277 
    278   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
    279   // will trim any leading whitespace.
    280   bool in_whitespace = true;
    281   bool already_trimmed = true;
    282 
    283   int chars_written = 0;
    284   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
    285     if (IsWhitespace(*i)) {
    286       if (!in_whitespace) {
    287         // Reduce all whitespace sequences to a single space.
    288         in_whitespace = true;
    289         result[chars_written++] = L' ';
    290       }
    291       if (trim_sequences_with_line_breaks && !already_trimmed &&
    292           ((*i == '\n') || (*i == '\r'))) {
    293         // Whitespace sequences containing CR or LF are eliminated entirely.
    294         already_trimmed = true;
    295         --chars_written;
    296       }
    297     } else {
    298       // Non-whitespace chracters are copied straight across.
    299       in_whitespace = false;
    300       already_trimmed = false;
    301       result[chars_written++] = *i;
    302     }
    303   }
    304 
    305   if (in_whitespace && !already_trimmed) {
    306     // Any trailing whitespace is eliminated.
    307     --chars_written;
    308   }
    309 
    310   result.resize(chars_written);
    311   return result;
    312 }
    313 
    314 std::wstring CollapseWhitespace(const std::wstring& text,
    315                                 bool trim_sequences_with_line_breaks) {
    316   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    317 }
    318 
    319 #if !defined(WCHAR_T_IS_UTF16)
    320 string16 CollapseWhitespace(const string16& text,
    321                             bool trim_sequences_with_line_breaks) {
    322   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    323 }
    324 #endif
    325 
    326 std::string CollapseWhitespaceASCII(const std::string& text,
    327                                     bool trim_sequences_with_line_breaks) {
    328   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    329 }
    330 
    331 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
    332   for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
    333     if (!IsAsciiWhitespace(*i))
    334       return false;
    335   }
    336   return true;
    337 }
    338 
    339 bool ContainsOnlyWhitespace(const string16& str) {
    340   return str.find_first_not_of(kWhitespaceUTF16) == string16::npos;
    341 }
    342 
    343 template<typename STR>
    344 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
    345   for (typename STR::const_iterator iter = input.begin();
    346        iter != input.end(); ++iter) {
    347     if (characters.find(*iter) == STR::npos)
    348       return false;
    349   }
    350   return true;
    351 }
    352 
    353 bool ContainsOnlyChars(const std::wstring& input,
    354                        const std::wstring& characters) {
    355   return ContainsOnlyCharsT(input, characters);
    356 }
    357 
    358 #if !defined(WCHAR_T_IS_UTF16)
    359 bool ContainsOnlyChars(const string16& input, const string16& characters) {
    360   return ContainsOnlyCharsT(input, characters);
    361 }
    362 #endif
    363 
    364 bool ContainsOnlyChars(const std::string& input,
    365                        const std::string& characters) {
    366   return ContainsOnlyCharsT(input, characters);
    367 }
    368 
    369 std::string WideToASCII(const std::wstring& wide) {
    370   DCHECK(IsStringASCII(wide)) << wide;
    371   return std::string(wide.begin(), wide.end());
    372 }
    373 
    374 std::string UTF16ToASCII(const string16& utf16) {
    375   DCHECK(IsStringASCII(utf16)) << utf16;
    376   return std::string(utf16.begin(), utf16.end());
    377 }
    378 
    379 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
    380 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
    381   std::string output;
    382   output.resize(wide.size());
    383   latin1->clear();
    384   for (size_t i = 0; i < wide.size(); i++) {
    385     if (wide[i] > 255)
    386       return false;
    387     output[i] = static_cast<char>(wide[i]);
    388   }
    389   latin1->swap(output);
    390   return true;
    391 }
    392 
    393 template<class STR>
    394 static bool DoIsStringASCII(const STR& str) {
    395   for (size_t i = 0; i < str.length(); i++) {
    396     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
    397     if (c > 0x7F)
    398       return false;
    399   }
    400   return true;
    401 }
    402 
    403 bool IsStringASCII(const std::wstring& str) {
    404   return DoIsStringASCII(str);
    405 }
    406 
    407 #if !defined(WCHAR_T_IS_UTF16)
    408 bool IsStringASCII(const string16& str) {
    409   return DoIsStringASCII(str);
    410 }
    411 #endif
    412 
    413 bool IsStringASCII(const base::StringPiece& str) {
    414   return DoIsStringASCII(str);
    415 }
    416 
    417 bool IsStringUTF8(const std::string& str) {
    418   const char *src = str.data();
    419   int32 src_len = static_cast<int32>(str.length());
    420   int32 char_index = 0;
    421 
    422   while (char_index < src_len) {
    423     int32 code_point;
    424     CBU8_NEXT(src, char_index, src_len, code_point);
    425     if (!base::IsValidCharacter(code_point))
    426       return false;
    427   }
    428   return true;
    429 }
    430 
    431 template<typename Iter>
    432 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
    433                                           Iter a_end,
    434                                           const char* b) {
    435   for (Iter it = a_begin; it != a_end; ++it, ++b) {
    436     if (!*b || base::ToLowerASCII(*it) != *b)
    437       return false;
    438   }
    439   return *b == 0;
    440 }
    441 
    442 // Front-ends for LowerCaseEqualsASCII.
    443 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
    444   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    445 }
    446 
    447 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
    448   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    449 }
    450 
    451 #if !defined(WCHAR_T_IS_UTF16)
    452 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
    453   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    454 }
    455 #endif
    456 
    457 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
    458                           std::string::const_iterator a_end,
    459                           const char* b) {
    460   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    461 }
    462 
    463 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
    464                           std::wstring::const_iterator a_end,
    465                           const char* b) {
    466   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    467 }
    468 
    469 #if !defined(WCHAR_T_IS_UTF16)
    470 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
    471                           string16::const_iterator a_end,
    472                           const char* b) {
    473   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    474 }
    475 #endif
    476 
    477 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
    478 #if !defined(OS_ANDROID)
    479 bool LowerCaseEqualsASCII(const char* a_begin,
    480                           const char* a_end,
    481                           const char* b) {
    482   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    483 }
    484 
    485 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
    486                           const wchar_t* a_end,
    487                           const char* b) {
    488   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    489 }
    490 
    491 #if !defined(WCHAR_T_IS_UTF16)
    492 bool LowerCaseEqualsASCII(const char16* a_begin,
    493                           const char16* a_end,
    494                           const char* b) {
    495   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    496 }
    497 #endif
    498 
    499 #endif  // !defined(OS_ANDROID)
    500 
    501 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
    502   if (a.length() != b.length())
    503     return false;
    504   return std::equal(b.begin(), b.end(), a.begin());
    505 }
    506 
    507 bool StartsWithASCII(const std::string& str,
    508                      const std::string& search,
    509                      bool case_sensitive) {
    510   if (case_sensitive)
    511     return str.compare(0, search.length(), search) == 0;
    512   else
    513     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
    514 }
    515 
    516 template <typename STR>
    517 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
    518   if (case_sensitive) {
    519     return str.compare(0, search.length(), search) == 0;
    520   } else {
    521     if (search.size() > str.size())
    522       return false;
    523     return std::equal(search.begin(), search.end(), str.begin(),
    524                       base::CaseInsensitiveCompare<typename STR::value_type>());
    525   }
    526 }
    527 
    528 bool StartsWith(const std::wstring& str, const std::wstring& search,
    529                 bool case_sensitive) {
    530   return StartsWithT(str, search, case_sensitive);
    531 }
    532 
    533 #if !defined(WCHAR_T_IS_UTF16)
    534 bool StartsWith(const string16& str, const string16& search,
    535                 bool case_sensitive) {
    536   return StartsWithT(str, search, case_sensitive);
    537 }
    538 #endif
    539 
    540 template <typename STR>
    541 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
    542   typename STR::size_type str_length = str.length();
    543   typename STR::size_type search_length = search.length();
    544   if (search_length > str_length)
    545     return false;
    546   if (case_sensitive) {
    547     return str.compare(str_length - search_length, search_length, search) == 0;
    548   } else {
    549     return std::equal(search.begin(), search.end(),
    550                       str.begin() + (str_length - search_length),
    551                       base::CaseInsensitiveCompare<typename STR::value_type>());
    552   }
    553 }
    554 
    555 bool EndsWith(const std::string& str, const std::string& search,
    556               bool case_sensitive) {
    557   return EndsWithT(str, search, case_sensitive);
    558 }
    559 
    560 bool EndsWith(const std::wstring& str, const std::wstring& search,
    561               bool case_sensitive) {
    562   return EndsWithT(str, search, case_sensitive);
    563 }
    564 
    565 #if !defined(WCHAR_T_IS_UTF16)
    566 bool EndsWith(const string16& str, const string16& search,
    567               bool case_sensitive) {
    568   return EndsWithT(str, search, case_sensitive);
    569 }
    570 #endif
    571 
    572 static const char* const kByteStringsUnlocalized[] = {
    573   " B",
    574   " kB",
    575   " MB",
    576   " GB",
    577   " TB",
    578   " PB"
    579 };
    580 
    581 string16 FormatBytesUnlocalized(int64 bytes) {
    582   double unit_amount = static_cast<double>(bytes);
    583   size_t dimension = 0;
    584   const int kKilo = 1024;
    585   while (unit_amount >= kKilo &&
    586          dimension < arraysize(kByteStringsUnlocalized) - 1) {
    587     unit_amount /= kKilo;
    588     dimension++;
    589   }
    590 
    591   char buf[64];
    592   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
    593     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
    594                    kByteStringsUnlocalized[dimension]);
    595   } else {
    596     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
    597                    kByteStringsUnlocalized[dimension]);
    598   }
    599 
    600   return ASCIIToUTF16(buf);
    601 }
    602 
    603 template<class StringType>
    604 void DoReplaceSubstringsAfterOffset(StringType* str,
    605                                     typename StringType::size_type start_offset,
    606                                     const StringType& find_this,
    607                                     const StringType& replace_with,
    608                                     bool replace_all) {
    609   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
    610     return;
    611 
    612   DCHECK(!find_this.empty());
    613   for (typename StringType::size_type offs(str->find(find_this, start_offset));
    614       offs != StringType::npos; offs = str->find(find_this, offs)) {
    615     str->replace(offs, find_this.length(), replace_with);
    616     offs += replace_with.length();
    617 
    618     if (!replace_all)
    619       break;
    620   }
    621 }
    622 
    623 void ReplaceFirstSubstringAfterOffset(string16* str,
    624                                       string16::size_type start_offset,
    625                                       const string16& find_this,
    626                                       const string16& replace_with) {
    627   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    628                                  false);  // replace first instance
    629 }
    630 
    631 void ReplaceFirstSubstringAfterOffset(std::string* str,
    632                                       std::string::size_type start_offset,
    633                                       const std::string& find_this,
    634                                       const std::string& replace_with) {
    635   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    636                                  false);  // replace first instance
    637 }
    638 
    639 void ReplaceSubstringsAfterOffset(string16* str,
    640                                   string16::size_type start_offset,
    641                                   const string16& find_this,
    642                                   const string16& replace_with) {
    643   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    644                                  true);  // replace all instances
    645 }
    646 
    647 void ReplaceSubstringsAfterOffset(std::string* str,
    648                                   std::string::size_type start_offset,
    649                                   const std::string& find_this,
    650                                   const std::string& replace_with) {
    651   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    652                                  true);  // replace all instances
    653 }
    654 
    655 
    656 template<typename STR>
    657 static size_t TokenizeT(const STR& str,
    658                         const STR& delimiters,
    659                         std::vector<STR>* tokens) {
    660   tokens->clear();
    661 
    662   typename STR::size_type start = str.find_first_not_of(delimiters);
    663   while (start != STR::npos) {
    664     typename STR::size_type end = str.find_first_of(delimiters, start + 1);
    665     if (end == STR::npos) {
    666       tokens->push_back(str.substr(start));
    667       break;
    668     } else {
    669       tokens->push_back(str.substr(start, end - start));
    670       start = str.find_first_not_of(delimiters, end + 1);
    671     }
    672   }
    673 
    674   return tokens->size();
    675 }
    676 
    677 size_t Tokenize(const std::wstring& str,
    678                 const std::wstring& delimiters,
    679                 std::vector<std::wstring>* tokens) {
    680   return TokenizeT(str, delimiters, tokens);
    681 }
    682 
    683 #if !defined(WCHAR_T_IS_UTF16)
    684 size_t Tokenize(const string16& str,
    685                 const string16& delimiters,
    686                 std::vector<string16>* tokens) {
    687   return TokenizeT(str, delimiters, tokens);
    688 }
    689 #endif
    690 
    691 size_t Tokenize(const std::string& str,
    692                 const std::string& delimiters,
    693                 std::vector<std::string>* tokens) {
    694   return TokenizeT(str, delimiters, tokens);
    695 }
    696 
    697 size_t Tokenize(const base::StringPiece& str,
    698                 const base::StringPiece& delimiters,
    699                 std::vector<base::StringPiece>* tokens) {
    700   return TokenizeT(str, delimiters, tokens);
    701 }
    702 
    703 template<typename STR>
    704 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
    705   if (parts.empty())
    706     return STR();
    707 
    708   STR result(parts[0]);
    709   typename std::vector<STR>::const_iterator iter = parts.begin();
    710   ++iter;
    711 
    712   for (; iter != parts.end(); ++iter) {
    713     result += sep;
    714     result += *iter;
    715   }
    716 
    717   return result;
    718 }
    719 
    720 std::string JoinString(const std::vector<std::string>& parts, char sep) {
    721   return JoinStringT(parts, std::string(1, sep));
    722 }
    723 
    724 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
    725   return JoinStringT(parts, string16(1, sep));
    726 }
    727 
    728 std::string JoinString(const std::vector<std::string>& parts,
    729                        const std::string& separator) {
    730   return JoinStringT(parts, separator);
    731 }
    732 
    733 string16 JoinString(const std::vector<string16>& parts,
    734                     const string16& separator) {
    735   return JoinStringT(parts, separator);
    736 }
    737 
    738 template<class FormatStringType, class OutStringType>
    739 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
    740     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
    741   size_t substitutions = subst.size();
    742 
    743   size_t sub_length = 0;
    744   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
    745        iter != subst.end(); ++iter) {
    746     sub_length += iter->length();
    747   }
    748 
    749   OutStringType formatted;
    750   formatted.reserve(format_string.length() + sub_length);
    751 
    752   std::vector<ReplacementOffset> r_offsets;
    753   for (typename FormatStringType::const_iterator i = format_string.begin();
    754        i != format_string.end(); ++i) {
    755     if ('$' == *i) {
    756       if (i + 1 != format_string.end()) {
    757         ++i;
    758         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
    759         if ('$' == *i) {
    760           while (i != format_string.end() && '$' == *i) {
    761             formatted.push_back('$');
    762             ++i;
    763           }
    764           --i;
    765         } else {
    766           uintptr_t index = 0;
    767           while (i != format_string.end() && '0' <= *i && *i <= '9') {
    768             index *= 10;
    769             index += *i - '0';
    770             ++i;
    771           }
    772           --i;
    773           index -= 1;
    774           if (offsets) {
    775             ReplacementOffset r_offset(index,
    776                 static_cast<int>(formatted.size()));
    777             r_offsets.insert(std::lower_bound(r_offsets.begin(),
    778                                               r_offsets.end(),
    779                                               r_offset,
    780                                               &CompareParameter),
    781                              r_offset);
    782           }
    783           if (index < substitutions)
    784             formatted.append(subst.at(index));
    785         }
    786       }
    787     } else {
    788       formatted.push_back(*i);
    789     }
    790   }
    791   if (offsets) {
    792     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
    793          i != r_offsets.end(); ++i) {
    794       offsets->push_back(i->offset);
    795     }
    796   }
    797   return formatted;
    798 }
    799 
    800 string16 ReplaceStringPlaceholders(const string16& format_string,
    801                                    const std::vector<string16>& subst,
    802                                    std::vector<size_t>* offsets) {
    803   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    804 }
    805 
    806 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
    807                                       const std::vector<std::string>& subst,
    808                                       std::vector<size_t>* offsets) {
    809   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    810 }
    811 
    812 string16 ReplaceStringPlaceholders(const string16& format_string,
    813                                    const string16& a,
    814                                    size_t* offset) {
    815   std::vector<size_t> offsets;
    816   std::vector<string16> subst;
    817   subst.push_back(a);
    818   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
    819 
    820   DCHECK(offsets.size() == 1);
    821   if (offset) {
    822     *offset = offsets[0];
    823   }
    824   return result;
    825 }
    826 
    827 static bool IsWildcard(base_icu::UChar32 character) {
    828   return character == '*' || character == '?';
    829 }
    830 
    831 // Move the strings pointers to the point where they start to differ.
    832 template <typename CHAR, typename NEXT>
    833 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
    834                          const CHAR** string, const CHAR* string_end,
    835                          NEXT next) {
    836   const CHAR* escape = NULL;
    837   while (*pattern != pattern_end && *string != string_end) {
    838     if (!escape && IsWildcard(**pattern)) {
    839       // We don't want to match wildcard here, except if it's escaped.
    840       return;
    841     }
    842 
    843     // Check if the escapement char is found. If so, skip it and move to the
    844     // next character.
    845     if (!escape && **pattern == '\\') {
    846       escape = *pattern;
    847       next(pattern, pattern_end);
    848       continue;
    849     }
    850 
    851     // Check if the chars match, if so, increment the ptrs.
    852     const CHAR* pattern_next = *pattern;
    853     const CHAR* string_next = *string;
    854     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
    855     if (pattern_char == next(&string_next, string_end) &&
    856         pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
    857       *pattern = pattern_next;
    858       *string = string_next;
    859     } else {
    860       // Uh ho, it did not match, we are done. If the last char was an
    861       // escapement, that means that it was an error to advance the ptr here,
    862       // let's put it back where it was. This also mean that the MatchPattern
    863       // function will return false because if we can't match an escape char
    864       // here, then no one will.
    865       if (escape) {
    866         *pattern = escape;
    867       }
    868       return;
    869     }
    870 
    871     escape = NULL;
    872   }
    873 }
    874 
    875 template <typename CHAR, typename NEXT>
    876 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
    877   while (*pattern != end) {
    878     if (!IsWildcard(**pattern))
    879       return;
    880     next(pattern, end);
    881   }
    882 }
    883 
    884 template <typename CHAR, typename NEXT>
    885 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
    886                           const CHAR* pattern, const CHAR* pattern_end,
    887                           int depth,
    888                           NEXT next) {
    889   const int kMaxDepth = 16;
    890   if (depth > kMaxDepth)
    891     return false;
    892 
    893   // Eat all the matching chars.
    894   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
    895 
    896   // If the string is empty, then the pattern must be empty too, or contains
    897   // only wildcards.
    898   if (eval == eval_end) {
    899     EatWildcard(&pattern, pattern_end, next);
    900     return pattern == pattern_end;
    901   }
    902 
    903   // Pattern is empty but not string, this is not a match.
    904   if (pattern == pattern_end)
    905     return false;
    906 
    907   // If this is a question mark, then we need to compare the rest with
    908   // the current string or the string with one character eaten.
    909   const CHAR* next_pattern = pattern;
    910   next(&next_pattern, pattern_end);
    911   if (pattern[0] == '?') {
    912     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
    913                       depth + 1, next))
    914       return true;
    915     const CHAR* next_eval = eval;
    916     next(&next_eval, eval_end);
    917     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
    918                       depth + 1, next))
    919       return true;
    920   }
    921 
    922   // This is a *, try to match all the possible substrings with the remainder
    923   // of the pattern.
    924   if (pattern[0] == '*') {
    925     // Collapse duplicate wild cards (********** into *) so that the
    926     // method does not recurse unnecessarily. http://crbug.com/52839
    927     EatWildcard(&next_pattern, pattern_end, next);
    928 
    929     while (eval != eval_end) {
    930       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
    931                         depth + 1, next))
    932         return true;
    933       eval++;
    934     }
    935 
    936     // We reached the end of the string, let see if the pattern contains only
    937     // wildcards.
    938     if (eval == eval_end) {
    939       EatWildcard(&pattern, pattern_end, next);
    940       if (pattern != pattern_end)
    941         return false;
    942       return true;
    943     }
    944   }
    945 
    946   return false;
    947 }
    948 
    949 struct NextCharUTF8 {
    950   base_icu::UChar32 operator()(const char** p, const char* end) {
    951     base_icu::UChar32 c;
    952     int offset = 0;
    953     CBU8_NEXT(*p, offset, end - *p, c);
    954     *p += offset;
    955     return c;
    956   }
    957 };
    958 
    959 struct NextCharUTF16 {
    960   base_icu::UChar32 operator()(const char16** p, const char16* end) {
    961     base_icu::UChar32 c;
    962     int offset = 0;
    963     CBU16_NEXT(*p, offset, end - *p, c);
    964     *p += offset;
    965     return c;
    966   }
    967 };
    968 
    969 bool MatchPattern(const base::StringPiece& eval,
    970                   const base::StringPiece& pattern) {
    971   return MatchPatternT(eval.data(), eval.data() + eval.size(),
    972                        pattern.data(), pattern.data() + pattern.size(),
    973                        0, NextCharUTF8());
    974 }
    975 
    976 bool MatchPattern(const string16& eval, const string16& pattern) {
    977   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
    978                        pattern.c_str(), pattern.c_str() + pattern.size(),
    979                        0, NextCharUTF16());
    980 }
    981 
    982 // The following code is compatible with the OpenBSD lcpy interface.  See:
    983 //   http://www.gratisoft.us/todd/papers/strlcpy.html
    984 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
    985 
    986 namespace {
    987 
    988 template <typename CHAR>
    989 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
    990   for (size_t i = 0; i < dst_size; ++i) {
    991     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
    992       return i;
    993   }
    994 
    995   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
    996   if (dst_size != 0)
    997     dst[dst_size - 1] = 0;
    998 
    999   // Count the rest of the |src|, and return it's length in characters.
   1000   while (src[dst_size]) ++dst_size;
   1001   return dst_size;
   1002 }
   1003 
   1004 }  // namespace
   1005 
   1006 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
   1007   return lcpyT<char>(dst, src, dst_size);
   1008 }
   1009 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
   1010   return lcpyT<wchar_t>(dst, src, dst_size);
   1011 }
   1012