Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <ctype.h>
      8 #include <errno.h>
      9 #include <math.h>
     10 #include <stdarg.h>
     11 #include <stdio.h>
     12 #include <stdlib.h>
     13 #include <string.h>
     14 #include <time.h>
     15 #include <wchar.h>
     16 #include <wctype.h>
     17 
     18 #include <algorithm>
     19 #include <vector>
     20 
     21 #include "base/basictypes.h"
     22 #include "base/logging.h"
     23 #include "base/memory/singleton.h"
     24 #include "base/strings/utf_string_conversion_utils.h"
     25 #include "base/strings/utf_string_conversions.h"
     26 #include "base/third_party/icu/icu_utf.h"
     27 #include "build/build_config.h"
     28 
     29 // Remove when this entire file is in the base namespace.
     30 using base::char16;
     31 using base::string16;
     32 
     33 namespace {
     34 
     35 // Force the singleton used by EmptyString[16] to be a unique type. This
     36 // prevents other code that might accidentally use Singleton<string> from
     37 // getting our internal one.
     38 struct EmptyStrings {
     39   EmptyStrings() {}
     40   const std::string s;
     41   const string16 s16;
     42 
     43   static EmptyStrings* GetInstance() {
     44     return Singleton<EmptyStrings>::get();
     45   }
     46 };
     47 
     48 // Used by ReplaceStringPlaceholders to track the position in the string of
     49 // replaced parameters.
     50 struct ReplacementOffset {
     51   ReplacementOffset(uintptr_t parameter, size_t offset)
     52       : parameter(parameter),
     53         offset(offset) {}
     54 
     55   // Index of the parameter.
     56   uintptr_t parameter;
     57 
     58   // Starting position in the string.
     59   size_t offset;
     60 };
     61 
     62 static bool CompareParameter(const ReplacementOffset& elem1,
     63                              const ReplacementOffset& elem2) {
     64   return elem1.parameter < elem2.parameter;
     65 }
     66 
     67 }  // namespace
     68 
     69 namespace base {
     70 
     71 bool IsWprintfFormatPortable(const wchar_t* format) {
     72   for (const wchar_t* position = format; *position != '\0'; ++position) {
     73     if (*position == '%') {
     74       bool in_specification = true;
     75       bool modifier_l = false;
     76       while (in_specification) {
     77         // Eat up characters until reaching a known specifier.
     78         if (*++position == '\0') {
     79           // The format string ended in the middle of a specification.  Call
     80           // it portable because no unportable specifications were found.  The
     81           // string is equally broken on all platforms.
     82           return true;
     83         }
     84 
     85         if (*position == 'l') {
     86           // 'l' is the only thing that can save the 's' and 'c' specifiers.
     87           modifier_l = true;
     88         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
     89                    *position == 'S' || *position == 'C' || *position == 'F' ||
     90                    *position == 'D' || *position == 'O' || *position == 'U') {
     91           // Not portable.
     92           return false;
     93         }
     94 
     95         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
     96           // Portable, keep scanning the rest of the format string.
     97           in_specification = false;
     98         }
     99       }
    100     }
    101   }
    102 
    103   return true;
    104 }
    105 
    106 const std::string& EmptyString() {
    107   return EmptyStrings::GetInstance()->s;
    108 }
    109 
    110 const string16& EmptyString16() {
    111   return EmptyStrings::GetInstance()->s16;
    112 }
    113 
    114 template<typename STR>
    115 bool ReplaceCharsT(const STR& input,
    116                    const STR& replace_chars,
    117                    const STR& replace_with,
    118                    STR* output) {
    119   bool removed = false;
    120   size_t replace_length = replace_with.length();
    121 
    122   *output = input;
    123 
    124   size_t found = output->find_first_of(replace_chars);
    125   while (found != STR::npos) {
    126     removed = true;
    127     output->replace(found, 1, replace_with);
    128     found = output->find_first_of(replace_chars, found + replace_length);
    129   }
    130 
    131   return removed;
    132 }
    133 
    134 bool ReplaceChars(const string16& input,
    135                   const base::StringPiece16& replace_chars,
    136                   const string16& replace_with,
    137                   string16* output) {
    138   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    139 }
    140 
    141 bool ReplaceChars(const std::string& input,
    142                   const base::StringPiece& replace_chars,
    143                   const std::string& replace_with,
    144                   std::string* output) {
    145   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    146 }
    147 
    148 bool RemoveChars(const string16& input,
    149                  const base::StringPiece16& remove_chars,
    150                  string16* output) {
    151   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
    152 }
    153 
    154 bool RemoveChars(const std::string& input,
    155                  const base::StringPiece& remove_chars,
    156                  std::string* output) {
    157   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
    158 }
    159 
    160 template<typename STR>
    161 TrimPositions TrimStringT(const STR& input,
    162                           const STR& trim_chars,
    163                           TrimPositions positions,
    164                           STR* output) {
    165   // Find the edges of leading/trailing whitespace as desired.
    166   const size_t last_char = input.length() - 1;
    167   const size_t first_good_char = (positions & TRIM_LEADING) ?
    168       input.find_first_not_of(trim_chars) : 0;
    169   const size_t last_good_char = (positions & TRIM_TRAILING) ?
    170       input.find_last_not_of(trim_chars) : last_char;
    171 
    172   // When the string was all whitespace, report that we stripped off whitespace
    173   // from whichever position the caller was interested in.  For empty input, we
    174   // stripped no whitespace, but we still need to clear |output|.
    175   if (input.empty() ||
    176       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
    177     bool input_was_empty = input.empty();  // in case output == &input
    178     output->clear();
    179     return input_was_empty ? TRIM_NONE : positions;
    180   }
    181 
    182   // Trim the whitespace.
    183   *output =
    184       input.substr(first_good_char, last_good_char - first_good_char + 1);
    185 
    186   // Return where we trimmed from.
    187   return static_cast<TrimPositions>(
    188       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
    189       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
    190 }
    191 
    192 bool TrimString(const string16& input,
    193                 const base::StringPiece16& trim_chars,
    194                 string16* output) {
    195   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
    196       TRIM_NONE;
    197 }
    198 
    199 bool TrimString(const std::string& input,
    200                 const base::StringPiece& trim_chars,
    201                 std::string* output) {
    202   return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
    203       TRIM_NONE;
    204 }
    205 
    206 void TruncateUTF8ToByteSize(const std::string& input,
    207                             const size_t byte_size,
    208                             std::string* output) {
    209   DCHECK(output);
    210   if (byte_size > input.length()) {
    211     *output = input;
    212     return;
    213   }
    214   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
    215   // Note: This cast is necessary because CBU8_NEXT uses int32s.
    216   int32 truncation_length = static_cast<int32>(byte_size);
    217   int32 char_index = truncation_length - 1;
    218   const char* data = input.data();
    219 
    220   // Using CBU8, we will move backwards from the truncation point
    221   // to the beginning of the string looking for a valid UTF8
    222   // character.  Once a full UTF8 character is found, we will
    223   // truncate the string to the end of that character.
    224   while (char_index >= 0) {
    225     int32 prev = char_index;
    226     uint32 code_point = 0;
    227     CBU8_NEXT(data, char_index, truncation_length, code_point);
    228     if (!IsValidCharacter(code_point) ||
    229         !IsValidCodepoint(code_point)) {
    230       char_index = prev - 1;
    231     } else {
    232       break;
    233     }
    234   }
    235 
    236   if (char_index >= 0 )
    237     *output = input.substr(0, char_index);
    238   else
    239     output->clear();
    240 }
    241 
    242 TrimPositions TrimWhitespace(const string16& input,
    243                              TrimPositions positions,
    244                              string16* output) {
    245   return TrimStringT(input, base::string16(kWhitespaceUTF16), positions,
    246                      output);
    247 }
    248 
    249 TrimPositions TrimWhitespaceASCII(const std::string& input,
    250                                   TrimPositions positions,
    251                                   std::string* output) {
    252   return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
    253 }
    254 
    255 // This function is only for backward-compatibility.
    256 // To be removed when all callers are updated.
    257 TrimPositions TrimWhitespace(const std::string& input,
    258                              TrimPositions positions,
    259                              std::string* output) {
    260   return TrimWhitespaceASCII(input, positions, output);
    261 }
    262 
    263 template<typename STR>
    264 STR CollapseWhitespaceT(const STR& text,
    265                         bool trim_sequences_with_line_breaks) {
    266   STR result;
    267   result.resize(text.size());
    268 
    269   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
    270   // will trim any leading whitespace.
    271   bool in_whitespace = true;
    272   bool already_trimmed = true;
    273 
    274   int chars_written = 0;
    275   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
    276     if (IsWhitespace(*i)) {
    277       if (!in_whitespace) {
    278         // Reduce all whitespace sequences to a single space.
    279         in_whitespace = true;
    280         result[chars_written++] = L' ';
    281       }
    282       if (trim_sequences_with_line_breaks && !already_trimmed &&
    283           ((*i == '\n') || (*i == '\r'))) {
    284         // Whitespace sequences containing CR or LF are eliminated entirely.
    285         already_trimmed = true;
    286         --chars_written;
    287       }
    288     } else {
    289       // Non-whitespace chracters are copied straight across.
    290       in_whitespace = false;
    291       already_trimmed = false;
    292       result[chars_written++] = *i;
    293     }
    294   }
    295 
    296   if (in_whitespace && !already_trimmed) {
    297     // Any trailing whitespace is eliminated.
    298     --chars_written;
    299   }
    300 
    301   result.resize(chars_written);
    302   return result;
    303 }
    304 
    305 string16 CollapseWhitespace(const string16& text,
    306                             bool trim_sequences_with_line_breaks) {
    307   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    308 }
    309 
    310 std::string CollapseWhitespaceASCII(const std::string& text,
    311                                     bool trim_sequences_with_line_breaks) {
    312   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    313 }
    314 
    315 bool ContainsOnlyChars(const StringPiece& input,
    316                        const StringPiece& characters) {
    317   return input.find_first_not_of(characters) == StringPiece::npos;
    318 }
    319 
    320 bool ContainsOnlyChars(const StringPiece16& input,
    321                        const StringPiece16& characters) {
    322   return input.find_first_not_of(characters) == StringPiece16::npos;
    323 }
    324 
    325 template<class STR>
    326 static bool DoIsStringASCII(const STR& str) {
    327   for (size_t i = 0; i < str.length(); i++) {
    328     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
    329     if (c > 0x7F)
    330       return false;
    331   }
    332   return true;
    333 }
    334 
    335 bool IsStringASCII(const StringPiece& str) {
    336   return DoIsStringASCII(str);
    337 }
    338 
    339 bool IsStringASCII(const string16& str) {
    340   return DoIsStringASCII(str);
    341 }
    342 
    343 bool IsStringUTF8(const std::string& str) {
    344   const char *src = str.data();
    345   int32 src_len = static_cast<int32>(str.length());
    346   int32 char_index = 0;
    347 
    348   while (char_index < src_len) {
    349     int32 code_point;
    350     CBU8_NEXT(src, char_index, src_len, code_point);
    351     if (!IsValidCharacter(code_point))
    352       return false;
    353   }
    354   return true;
    355 }
    356 
    357 }  // namespace base
    358 
    359 template<typename Iter>
    360 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
    361                                           Iter a_end,
    362                                           const char* b) {
    363   for (Iter it = a_begin; it != a_end; ++it, ++b) {
    364     if (!*b || base::ToLowerASCII(*it) != *b)
    365       return false;
    366   }
    367   return *b == 0;
    368 }
    369 
    370 // Front-ends for LowerCaseEqualsASCII.
    371 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
    372   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    373 }
    374 
    375 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
    376   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
    377 }
    378 
    379 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
    380                           std::string::const_iterator a_end,
    381                           const char* b) {
    382   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    383 }
    384 
    385 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
    386                           string16::const_iterator a_end,
    387                           const char* b) {
    388   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    389 }
    390 
    391 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
    392 #if !defined(OS_ANDROID)
    393 bool LowerCaseEqualsASCII(const char* a_begin,
    394                           const char* a_end,
    395                           const char* b) {
    396   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    397 }
    398 
    399 bool LowerCaseEqualsASCII(const char16* a_begin,
    400                           const char16* a_end,
    401                           const char* b) {
    402   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
    403 }
    404 
    405 #endif  // !defined(OS_ANDROID)
    406 
    407 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
    408   if (a.length() != b.length())
    409     return false;
    410   return std::equal(b.begin(), b.end(), a.begin());
    411 }
    412 
    413 bool StartsWithASCII(const std::string& str,
    414                      const std::string& search,
    415                      bool case_sensitive) {
    416   if (case_sensitive)
    417     return str.compare(0, search.length(), search) == 0;
    418   else
    419     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
    420 }
    421 
    422 template <typename STR>
    423 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
    424   if (case_sensitive) {
    425     return str.compare(0, search.length(), search) == 0;
    426   } else {
    427     if (search.size() > str.size())
    428       return false;
    429     return std::equal(search.begin(), search.end(), str.begin(),
    430                       base::CaseInsensitiveCompare<typename STR::value_type>());
    431   }
    432 }
    433 
    434 bool StartsWith(const string16& str, const string16& search,
    435                 bool case_sensitive) {
    436   return StartsWithT(str, search, case_sensitive);
    437 }
    438 
    439 template <typename STR>
    440 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
    441   size_t str_length = str.length();
    442   size_t search_length = search.length();
    443   if (search_length > str_length)
    444     return false;
    445   if (case_sensitive)
    446     return str.compare(str_length - search_length, search_length, search) == 0;
    447   return std::equal(search.begin(), search.end(),
    448                     str.begin() + (str_length - search_length),
    449                     base::CaseInsensitiveCompare<typename STR::value_type>());
    450 }
    451 
    452 bool EndsWith(const std::string& str, const std::string& search,
    453               bool case_sensitive) {
    454   return EndsWithT(str, search, case_sensitive);
    455 }
    456 
    457 bool EndsWith(const string16& str, const string16& search,
    458               bool case_sensitive) {
    459   return EndsWithT(str, search, case_sensitive);
    460 }
    461 
    462 static const char* const kByteStringsUnlocalized[] = {
    463   " B",
    464   " kB",
    465   " MB",
    466   " GB",
    467   " TB",
    468   " PB"
    469 };
    470 
    471 string16 FormatBytesUnlocalized(int64 bytes) {
    472   double unit_amount = static_cast<double>(bytes);
    473   size_t dimension = 0;
    474   const int kKilo = 1024;
    475   while (unit_amount >= kKilo &&
    476          dimension < arraysize(kByteStringsUnlocalized) - 1) {
    477     unit_amount /= kKilo;
    478     dimension++;
    479   }
    480 
    481   char buf[64];
    482   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
    483     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
    484                    kByteStringsUnlocalized[dimension]);
    485   } else {
    486     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
    487                    kByteStringsUnlocalized[dimension]);
    488   }
    489 
    490   return base::ASCIIToUTF16(buf);
    491 }
    492 
    493 template<class StringType>
    494 void DoReplaceSubstringsAfterOffset(StringType* str,
    495                                     size_t start_offset,
    496                                     const StringType& find_this,
    497                                     const StringType& replace_with,
    498                                     bool replace_all) {
    499   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
    500     return;
    501 
    502   DCHECK(!find_this.empty());
    503   for (size_t offs(str->find(find_this, start_offset));
    504       offs != StringType::npos; offs = str->find(find_this, offs)) {
    505     str->replace(offs, find_this.length(), replace_with);
    506     offs += replace_with.length();
    507 
    508     if (!replace_all)
    509       break;
    510   }
    511 }
    512 
    513 void ReplaceFirstSubstringAfterOffset(string16* str,
    514                                       size_t start_offset,
    515                                       const string16& find_this,
    516                                       const string16& replace_with) {
    517   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    518                                  false);  // replace first instance
    519 }
    520 
    521 void ReplaceFirstSubstringAfterOffset(std::string* str,
    522                                       size_t start_offset,
    523                                       const std::string& find_this,
    524                                       const std::string& replace_with) {
    525   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    526                                  false);  // replace first instance
    527 }
    528 
    529 void ReplaceSubstringsAfterOffset(string16* str,
    530                                   size_t start_offset,
    531                                   const string16& find_this,
    532                                   const string16& replace_with) {
    533   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    534                                  true);  // replace all instances
    535 }
    536 
    537 void ReplaceSubstringsAfterOffset(std::string* str,
    538                                   size_t start_offset,
    539                                   const std::string& find_this,
    540                                   const std::string& replace_with) {
    541   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
    542                                  true);  // replace all instances
    543 }
    544 
    545 
    546 template<typename STR>
    547 static size_t TokenizeT(const STR& str,
    548                         const STR& delimiters,
    549                         std::vector<STR>* tokens) {
    550   tokens->clear();
    551 
    552   size_t start = str.find_first_not_of(delimiters);
    553   while (start != STR::npos) {
    554     size_t end = str.find_first_of(delimiters, start + 1);
    555     if (end == STR::npos) {
    556       tokens->push_back(str.substr(start));
    557       break;
    558     } else {
    559       tokens->push_back(str.substr(start, end - start));
    560       start = str.find_first_not_of(delimiters, end + 1);
    561     }
    562   }
    563 
    564   return tokens->size();
    565 }
    566 
    567 size_t Tokenize(const string16& str,
    568                 const string16& delimiters,
    569                 std::vector<string16>* tokens) {
    570   return TokenizeT(str, delimiters, tokens);
    571 }
    572 
    573 size_t Tokenize(const std::string& str,
    574                 const std::string& delimiters,
    575                 std::vector<std::string>* tokens) {
    576   return TokenizeT(str, delimiters, tokens);
    577 }
    578 
    579 size_t Tokenize(const base::StringPiece& str,
    580                 const base::StringPiece& delimiters,
    581                 std::vector<base::StringPiece>* tokens) {
    582   return TokenizeT(str, delimiters, tokens);
    583 }
    584 
    585 template<typename STR>
    586 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
    587   if (parts.empty())
    588     return STR();
    589 
    590   STR result(parts[0]);
    591   typename std::vector<STR>::const_iterator iter = parts.begin();
    592   ++iter;
    593 
    594   for (; iter != parts.end(); ++iter) {
    595     result += sep;
    596     result += *iter;
    597   }
    598 
    599   return result;
    600 }
    601 
    602 std::string JoinString(const std::vector<std::string>& parts, char sep) {
    603   return JoinStringT(parts, std::string(1, sep));
    604 }
    605 
    606 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
    607   return JoinStringT(parts, string16(1, sep));
    608 }
    609 
    610 std::string JoinString(const std::vector<std::string>& parts,
    611                        const std::string& separator) {
    612   return JoinStringT(parts, separator);
    613 }
    614 
    615 string16 JoinString(const std::vector<string16>& parts,
    616                     const string16& separator) {
    617   return JoinStringT(parts, separator);
    618 }
    619 
    620 template<class FormatStringType, class OutStringType>
    621 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
    622     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
    623   size_t substitutions = subst.size();
    624 
    625   size_t sub_length = 0;
    626   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
    627        iter != subst.end(); ++iter) {
    628     sub_length += iter->length();
    629   }
    630 
    631   OutStringType formatted;
    632   formatted.reserve(format_string.length() + sub_length);
    633 
    634   std::vector<ReplacementOffset> r_offsets;
    635   for (typename FormatStringType::const_iterator i = format_string.begin();
    636        i != format_string.end(); ++i) {
    637     if ('$' == *i) {
    638       if (i + 1 != format_string.end()) {
    639         ++i;
    640         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
    641         if ('$' == *i) {
    642           while (i != format_string.end() && '$' == *i) {
    643             formatted.push_back('$');
    644             ++i;
    645           }
    646           --i;
    647         } else {
    648           uintptr_t index = 0;
    649           while (i != format_string.end() && '0' <= *i && *i <= '9') {
    650             index *= 10;
    651             index += *i - '0';
    652             ++i;
    653           }
    654           --i;
    655           index -= 1;
    656           if (offsets) {
    657             ReplacementOffset r_offset(index,
    658                 static_cast<int>(formatted.size()));
    659             r_offsets.insert(std::lower_bound(r_offsets.begin(),
    660                                               r_offsets.end(),
    661                                               r_offset,
    662                                               &CompareParameter),
    663                              r_offset);
    664           }
    665           if (index < substitutions)
    666             formatted.append(subst.at(index));
    667         }
    668       }
    669     } else {
    670       formatted.push_back(*i);
    671     }
    672   }
    673   if (offsets) {
    674     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
    675          i != r_offsets.end(); ++i) {
    676       offsets->push_back(i->offset);
    677     }
    678   }
    679   return formatted;
    680 }
    681 
    682 string16 ReplaceStringPlaceholders(const string16& format_string,
    683                                    const std::vector<string16>& subst,
    684                                    std::vector<size_t>* offsets) {
    685   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    686 }
    687 
    688 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
    689                                       const std::vector<std::string>& subst,
    690                                       std::vector<size_t>* offsets) {
    691   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    692 }
    693 
    694 string16 ReplaceStringPlaceholders(const string16& format_string,
    695                                    const string16& a,
    696                                    size_t* offset) {
    697   std::vector<size_t> offsets;
    698   std::vector<string16> subst;
    699   subst.push_back(a);
    700   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
    701 
    702   DCHECK_EQ(1U, offsets.size());
    703   if (offset)
    704     *offset = offsets[0];
    705   return result;
    706 }
    707 
    708 static bool IsWildcard(base_icu::UChar32 character) {
    709   return character == '*' || character == '?';
    710 }
    711 
    712 // Move the strings pointers to the point where they start to differ.
    713 template <typename CHAR, typename NEXT>
    714 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
    715                          const CHAR** string, const CHAR* string_end,
    716                          NEXT next) {
    717   const CHAR* escape = NULL;
    718   while (*pattern != pattern_end && *string != string_end) {
    719     if (!escape && IsWildcard(**pattern)) {
    720       // We don't want to match wildcard here, except if it's escaped.
    721       return;
    722     }
    723 
    724     // Check if the escapement char is found. If so, skip it and move to the
    725     // next character.
    726     if (!escape && **pattern == '\\') {
    727       escape = *pattern;
    728       next(pattern, pattern_end);
    729       continue;
    730     }
    731 
    732     // Check if the chars match, if so, increment the ptrs.
    733     const CHAR* pattern_next = *pattern;
    734     const CHAR* string_next = *string;
    735     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
    736     if (pattern_char == next(&string_next, string_end) &&
    737         pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
    738       *pattern = pattern_next;
    739       *string = string_next;
    740     } else {
    741       // Uh ho, it did not match, we are done. If the last char was an
    742       // escapement, that means that it was an error to advance the ptr here,
    743       // let's put it back where it was. This also mean that the MatchPattern
    744       // function will return false because if we can't match an escape char
    745       // here, then no one will.
    746       if (escape) {
    747         *pattern = escape;
    748       }
    749       return;
    750     }
    751 
    752     escape = NULL;
    753   }
    754 }
    755 
    756 template <typename CHAR, typename NEXT>
    757 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
    758   while (*pattern != end) {
    759     if (!IsWildcard(**pattern))
    760       return;
    761     next(pattern, end);
    762   }
    763 }
    764 
    765 template <typename CHAR, typename NEXT>
    766 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
    767                           const CHAR* pattern, const CHAR* pattern_end,
    768                           int depth,
    769                           NEXT next) {
    770   const int kMaxDepth = 16;
    771   if (depth > kMaxDepth)
    772     return false;
    773 
    774   // Eat all the matching chars.
    775   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
    776 
    777   // If the string is empty, then the pattern must be empty too, or contains
    778   // only wildcards.
    779   if (eval == eval_end) {
    780     EatWildcard(&pattern, pattern_end, next);
    781     return pattern == pattern_end;
    782   }
    783 
    784   // Pattern is empty but not string, this is not a match.
    785   if (pattern == pattern_end)
    786     return false;
    787 
    788   // If this is a question mark, then we need to compare the rest with
    789   // the current string or the string with one character eaten.
    790   const CHAR* next_pattern = pattern;
    791   next(&next_pattern, pattern_end);
    792   if (pattern[0] == '?') {
    793     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
    794                       depth + 1, next))
    795       return true;
    796     const CHAR* next_eval = eval;
    797     next(&next_eval, eval_end);
    798     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
    799                       depth + 1, next))
    800       return true;
    801   }
    802 
    803   // This is a *, try to match all the possible substrings with the remainder
    804   // of the pattern.
    805   if (pattern[0] == '*') {
    806     // Collapse duplicate wild cards (********** into *) so that the
    807     // method does not recurse unnecessarily. http://crbug.com/52839
    808     EatWildcard(&next_pattern, pattern_end, next);
    809 
    810     while (eval != eval_end) {
    811       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
    812                         depth + 1, next))
    813         return true;
    814       eval++;
    815     }
    816 
    817     // We reached the end of the string, let see if the pattern contains only
    818     // wildcards.
    819     if (eval == eval_end) {
    820       EatWildcard(&pattern, pattern_end, next);
    821       if (pattern != pattern_end)
    822         return false;
    823       return true;
    824     }
    825   }
    826 
    827   return false;
    828 }
    829 
    830 struct NextCharUTF8 {
    831   base_icu::UChar32 operator()(const char** p, const char* end) {
    832     base_icu::UChar32 c;
    833     int offset = 0;
    834     CBU8_NEXT(*p, offset, end - *p, c);
    835     *p += offset;
    836     return c;
    837   }
    838 };
    839 
    840 struct NextCharUTF16 {
    841   base_icu::UChar32 operator()(const char16** p, const char16* end) {
    842     base_icu::UChar32 c;
    843     int offset = 0;
    844     CBU16_NEXT(*p, offset, end - *p, c);
    845     *p += offset;
    846     return c;
    847   }
    848 };
    849 
    850 bool MatchPattern(const base::StringPiece& eval,
    851                   const base::StringPiece& pattern) {
    852   return MatchPatternT(eval.data(), eval.data() + eval.size(),
    853                        pattern.data(), pattern.data() + pattern.size(),
    854                        0, NextCharUTF8());
    855 }
    856 
    857 bool MatchPattern(const string16& eval, const string16& pattern) {
    858   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
    859                        pattern.c_str(), pattern.c_str() + pattern.size(),
    860                        0, NextCharUTF16());
    861 }
    862 
    863 // The following code is compatible with the OpenBSD lcpy interface.  See:
    864 //   http://www.gratisoft.us/todd/papers/strlcpy.html
    865 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
    866 
    867 namespace {
    868 
    869 template <typename CHAR>
    870 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
    871   for (size_t i = 0; i < dst_size; ++i) {
    872     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
    873       return i;
    874   }
    875 
    876   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
    877   if (dst_size != 0)
    878     dst[dst_size - 1] = 0;
    879 
    880   // Count the rest of the |src|, and return it's length in characters.
    881   while (src[dst_size]) ++dst_size;
    882   return dst_size;
    883 }
    884 
    885 }  // namespace
    886 
    887 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
    888   return lcpyT<char>(dst, src, dst_size);
    889 }
    890 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
    891   return lcpyT<wchar_t>(dst, src, dst_size);
    892 }
    893