Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <ctype.h>
      8 #include <errno.h>
      9 #include <math.h>
     10 #include <stdarg.h>
     11 #include <stdint.h>
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 #include <time.h>
     16 #include <wchar.h>
     17 #include <wctype.h>
     18 
     19 #include <algorithm>
     20 #include <limits>
     21 #include <vector>
     22 
     23 #include "base/logging.h"
     24 #include "base/macros.h"
     25 #include "base/memory/singleton.h"
     26 #include "base/strings/utf_string_conversion_utils.h"
     27 #include "base/strings/utf_string_conversions.h"
     28 #include "base/third_party/icu/icu_utf.h"
     29 #include "build/build_config.h"
     30 
     31 namespace base {
     32 
     33 namespace {
     34 
     35 // Force the singleton used by EmptyString[16] to be a unique type. This
     36 // prevents other code that might accidentally use Singleton<string> from
     37 // getting our internal one.
     38 struct EmptyStrings {
     39   EmptyStrings() = default;
     40   const std::string s;
     41   const string16 s16;
     42 
     43   static EmptyStrings* GetInstance() {
     44     return Singleton<EmptyStrings>::get();
     45   }
     46 };
     47 
     48 // Used by ReplaceStringPlaceholders to track the position in the string of
     49 // replaced parameters.
     50 struct ReplacementOffset {
     51   ReplacementOffset(uintptr_t parameter, size_t offset)
     52       : parameter(parameter),
     53         offset(offset) {}
     54 
     55   // Index of the parameter.
     56   uintptr_t parameter;
     57 
     58   // Starting position in the string.
     59   size_t offset;
     60 };
     61 
     62 static bool CompareParameter(const ReplacementOffset& elem1,
     63                              const ReplacementOffset& elem2) {
     64   return elem1.parameter < elem2.parameter;
     65 }
     66 
     67 // Overloaded function to append one string onto the end of another. Having a
     68 // separate overload for |source| as both string and StringPiece allows for more
     69 // efficient usage from functions templated to work with either type (avoiding a
     70 // redundant call to the BasicStringPiece constructor in both cases).
     71 template <typename string_type>
     72 inline void AppendToString(string_type* target, const string_type& source) {
     73   target->append(source);
     74 }
     75 
     76 template <typename string_type>
     77 inline void AppendToString(string_type* target,
     78                            const BasicStringPiece<string_type>& source) {
     79   source.AppendToString(target);
     80 }
     81 
     82 // Assuming that a pointer is the size of a "machine word", then
     83 // uintptr_t is an integer type that is also a machine word.
     84 typedef uintptr_t MachineWord;
     85 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
     86 
     87 inline bool IsAlignedToMachineWord(const void* pointer) {
     88   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
     89 }
     90 
     91 template<typename T> inline T* AlignToMachineWord(T* pointer) {
     92   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
     93                               ~kMachineWordAlignmentMask);
     94 }
     95 
     96 template<size_t size, typename CharacterType> struct NonASCIIMask;
     97 template<> struct NonASCIIMask<4, char16> {
     98     static inline uint32_t value() { return 0xFF80FF80U; }
     99 };
    100 template<> struct NonASCIIMask<4, char> {
    101     static inline uint32_t value() { return 0x80808080U; }
    102 };
    103 template<> struct NonASCIIMask<8, char16> {
    104     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
    105 };
    106 template<> struct NonASCIIMask<8, char> {
    107     static inline uint64_t value() { return 0x8080808080808080ULL; }
    108 };
    109 #if defined(WCHAR_T_IS_UTF32)
    110 template<> struct NonASCIIMask<4, wchar_t> {
    111     static inline uint32_t value() { return 0xFFFFFF80U; }
    112 };
    113 template<> struct NonASCIIMask<8, wchar_t> {
    114     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
    115 };
    116 #endif  // WCHAR_T_IS_UTF32
    117 
    118 }  // namespace
    119 
    120 bool IsWprintfFormatPortable(const wchar_t* format) {
    121   for (const wchar_t* position = format; *position != '\0'; ++position) {
    122     if (*position == '%') {
    123       bool in_specification = true;
    124       bool modifier_l = false;
    125       while (in_specification) {
    126         // Eat up characters until reaching a known specifier.
    127         if (*++position == '\0') {
    128           // The format string ended in the middle of a specification.  Call
    129           // it portable because no unportable specifications were found.  The
    130           // string is equally broken on all platforms.
    131           return true;
    132         }
    133 
    134         if (*position == 'l') {
    135           // 'l' is the only thing that can save the 's' and 'c' specifiers.
    136           modifier_l = true;
    137         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
    138                    *position == 'S' || *position == 'C' || *position == 'F' ||
    139                    *position == 'D' || *position == 'O' || *position == 'U') {
    140           // Not portable.
    141           return false;
    142         }
    143 
    144         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
    145           // Portable, keep scanning the rest of the format string.
    146           in_specification = false;
    147         }
    148       }
    149     }
    150   }
    151 
    152   return true;
    153 }
    154 
    155 namespace {
    156 
    157 template<typename StringType>
    158 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
    159   StringType ret;
    160   ret.reserve(str.size());
    161   for (size_t i = 0; i < str.size(); i++)
    162     ret.push_back(ToLowerASCII(str[i]));
    163   return ret;
    164 }
    165 
    166 template<typename StringType>
    167 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
    168   StringType ret;
    169   ret.reserve(str.size());
    170   for (size_t i = 0; i < str.size(); i++)
    171     ret.push_back(ToUpperASCII(str[i]));
    172   return ret;
    173 }
    174 
    175 }  // namespace
    176 
    177 std::string ToLowerASCII(StringPiece str) {
    178   return ToLowerASCIIImpl<std::string>(str);
    179 }
    180 
    181 string16 ToLowerASCII(StringPiece16 str) {
    182   return ToLowerASCIIImpl<string16>(str);
    183 }
    184 
    185 std::string ToUpperASCII(StringPiece str) {
    186   return ToUpperASCIIImpl<std::string>(str);
    187 }
    188 
    189 string16 ToUpperASCII(StringPiece16 str) {
    190   return ToUpperASCIIImpl<string16>(str);
    191 }
    192 
    193 template<class StringType>
    194 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
    195                                  BasicStringPiece<StringType> b) {
    196   // Find the first characters that aren't equal and compare them.  If the end
    197   // of one of the strings is found before a nonequal character, the lengths
    198   // of the strings are compared.
    199   size_t i = 0;
    200   while (i < a.length() && i < b.length()) {
    201     typename StringType::value_type lower_a = ToLowerASCII(a[i]);
    202     typename StringType::value_type lower_b = ToLowerASCII(b[i]);
    203     if (lower_a < lower_b)
    204       return -1;
    205     if (lower_a > lower_b)
    206       return 1;
    207     i++;
    208   }
    209 
    210   // End of one string hit before finding a different character. Expect the
    211   // common case to be "strings equal" at this point so check that first.
    212   if (a.length() == b.length())
    213     return 0;
    214 
    215   if (a.length() < b.length())
    216     return -1;
    217   return 1;
    218 }
    219 
    220 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    221   return CompareCaseInsensitiveASCIIT<std::string>(a, b);
    222 }
    223 
    224 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    225   return CompareCaseInsensitiveASCIIT<string16>(a, b);
    226 }
    227 
    228 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    229   if (a.length() != b.length())
    230     return false;
    231   return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
    232 }
    233 
    234 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    235   if (a.length() != b.length())
    236     return false;
    237   return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
    238 }
    239 
    240 const std::string& EmptyString() {
    241   return EmptyStrings::GetInstance()->s;
    242 }
    243 
    244 const string16& EmptyString16() {
    245   return EmptyStrings::GetInstance()->s16;
    246 }
    247 
    248 template <class StringType>
    249 bool ReplaceCharsT(const StringType& input,
    250                    BasicStringPiece<StringType> find_any_of_these,
    251                    BasicStringPiece<StringType> replace_with,
    252                    StringType* output);
    253 
    254 bool ReplaceChars(const string16& input,
    255                   StringPiece16 replace_chars,
    256                   const string16& replace_with,
    257                   string16* output) {
    258   return ReplaceCharsT(input, replace_chars, StringPiece16(replace_with),
    259                        output);
    260 }
    261 
    262 bool ReplaceChars(const std::string& input,
    263                   StringPiece replace_chars,
    264                   const std::string& replace_with,
    265                   std::string* output) {
    266   return ReplaceCharsT(input, replace_chars, StringPiece(replace_with), output);
    267 }
    268 
    269 bool RemoveChars(const string16& input,
    270                  StringPiece16 remove_chars,
    271                  string16* output) {
    272   return ReplaceCharsT(input, remove_chars, StringPiece16(), output);
    273 }
    274 
    275 bool RemoveChars(const std::string& input,
    276                  StringPiece remove_chars,
    277                  std::string* output) {
    278   return ReplaceCharsT(input, remove_chars, StringPiece(), output);
    279 }
    280 
    281 template<typename Str>
    282 TrimPositions TrimStringT(const Str& input,
    283                           BasicStringPiece<Str> trim_chars,
    284                           TrimPositions positions,
    285                           Str* output) {
    286   // Find the edges of leading/trailing whitespace as desired. Need to use
    287   // a StringPiece version of input to be able to call find* on it with the
    288   // StringPiece version of trim_chars (normally the trim_chars will be a
    289   // constant so avoid making a copy).
    290   BasicStringPiece<Str> input_piece(input);
    291   const size_t last_char = input.length() - 1;
    292   const size_t first_good_char = (positions & TRIM_LEADING) ?
    293       input_piece.find_first_not_of(trim_chars) : 0;
    294   const size_t last_good_char = (positions & TRIM_TRAILING) ?
    295       input_piece.find_last_not_of(trim_chars) : last_char;
    296 
    297   // When the string was all trimmed, report that we stripped off characters
    298   // from whichever position the caller was interested in. For empty input, we
    299   // stripped no characters, but we still need to clear |output|.
    300   if (input.empty() ||
    301       (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
    302     bool input_was_empty = input.empty();  // in case output == &input
    303     output->clear();
    304     return input_was_empty ? TRIM_NONE : positions;
    305   }
    306 
    307   // Trim.
    308   *output =
    309       input.substr(first_good_char, last_good_char - first_good_char + 1);
    310 
    311   // Return where we trimmed from.
    312   return static_cast<TrimPositions>(
    313       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
    314       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
    315 }
    316 
    317 bool TrimString(const string16& input,
    318                 StringPiece16 trim_chars,
    319                 string16* output) {
    320   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    321 }
    322 
    323 bool TrimString(const std::string& input,
    324                 StringPiece trim_chars,
    325                 std::string* output) {
    326   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    327 }
    328 
    329 template<typename Str>
    330 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
    331                                        BasicStringPiece<Str> trim_chars,
    332                                        TrimPositions positions) {
    333   size_t begin = (positions & TRIM_LEADING) ?
    334       input.find_first_not_of(trim_chars) : 0;
    335   size_t end = (positions & TRIM_TRAILING) ?
    336       input.find_last_not_of(trim_chars) + 1 : input.size();
    337   return input.substr(begin, end - begin);
    338 }
    339 
    340 StringPiece16 TrimString(StringPiece16 input,
    341                          StringPiece16 trim_chars,
    342                          TrimPositions positions) {
    343   return TrimStringPieceT(input, trim_chars, positions);
    344 }
    345 
    346 StringPiece TrimString(StringPiece input,
    347                        StringPiece trim_chars,
    348                        TrimPositions positions) {
    349   return TrimStringPieceT(input, trim_chars, positions);
    350 }
    351 
    352 void TruncateUTF8ToByteSize(const std::string& input,
    353                             const size_t byte_size,
    354                             std::string* output) {
    355   DCHECK(output);
    356   if (byte_size > input.length()) {
    357     *output = input;
    358     return;
    359   }
    360   DCHECK_LE(byte_size,
    361             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
    362   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
    363   int32_t truncation_length = static_cast<int32_t>(byte_size);
    364   int32_t char_index = truncation_length - 1;
    365   const char* data = input.data();
    366 
    367   // Using CBU8, we will move backwards from the truncation point
    368   // to the beginning of the string looking for a valid UTF8
    369   // character.  Once a full UTF8 character is found, we will
    370   // truncate the string to the end of that character.
    371   while (char_index >= 0) {
    372     int32_t prev = char_index;
    373     base_icu::UChar32 code_point = 0;
    374     CBU8_NEXT(data, char_index, truncation_length, code_point);
    375     if (!IsValidCharacter(code_point) ||
    376         !IsValidCodepoint(code_point)) {
    377       char_index = prev - 1;
    378     } else {
    379       break;
    380     }
    381   }
    382 
    383   if (char_index >= 0 )
    384     *output = input.substr(0, char_index);
    385   else
    386     output->clear();
    387 }
    388 
    389 TrimPositions TrimWhitespace(const string16& input,
    390                              TrimPositions positions,
    391                              string16* output) {
    392   return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
    393 }
    394 
    395 StringPiece16 TrimWhitespace(StringPiece16 input,
    396                              TrimPositions positions) {
    397   return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
    398 }
    399 
    400 TrimPositions TrimWhitespaceASCII(const std::string& input,
    401                                   TrimPositions positions,
    402                                   std::string* output) {
    403   return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
    404 }
    405 
    406 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
    407   return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
    408 }
    409 
    410 template<typename STR>
    411 STR CollapseWhitespaceT(const STR& text,
    412                         bool trim_sequences_with_line_breaks) {
    413   STR result;
    414   result.resize(text.size());
    415 
    416   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
    417   // will trim any leading whitespace.
    418   bool in_whitespace = true;
    419   bool already_trimmed = true;
    420 
    421   int chars_written = 0;
    422   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
    423     if (IsUnicodeWhitespace(*i)) {
    424       if (!in_whitespace) {
    425         // Reduce all whitespace sequences to a single space.
    426         in_whitespace = true;
    427         result[chars_written++] = L' ';
    428       }
    429       if (trim_sequences_with_line_breaks && !already_trimmed &&
    430           ((*i == '\n') || (*i == '\r'))) {
    431         // Whitespace sequences containing CR or LF are eliminated entirely.
    432         already_trimmed = true;
    433         --chars_written;
    434       }
    435     } else {
    436       // Non-whitespace chracters are copied straight across.
    437       in_whitespace = false;
    438       already_trimmed = false;
    439       result[chars_written++] = *i;
    440     }
    441   }
    442 
    443   if (in_whitespace && !already_trimmed) {
    444     // Any trailing whitespace is eliminated.
    445     --chars_written;
    446   }
    447 
    448   result.resize(chars_written);
    449   return result;
    450 }
    451 
    452 string16 CollapseWhitespace(const string16& text,
    453                             bool trim_sequences_with_line_breaks) {
    454   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    455 }
    456 
    457 std::string CollapseWhitespaceASCII(const std::string& text,
    458                                     bool trim_sequences_with_line_breaks) {
    459   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    460 }
    461 
    462 bool ContainsOnlyChars(StringPiece input, StringPiece characters) {
    463   return input.find_first_not_of(characters) == StringPiece::npos;
    464 }
    465 
    466 bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) {
    467   return input.find_first_not_of(characters) == StringPiece16::npos;
    468 }
    469 
    470 template <class Char>
    471 inline bool DoIsStringASCII(const Char* characters, size_t length) {
    472   MachineWord all_char_bits = 0;
    473   const Char* end = characters + length;
    474 
    475   // Prologue: align the input.
    476   while (!IsAlignedToMachineWord(characters) && characters != end) {
    477     all_char_bits |= *characters;
    478     ++characters;
    479   }
    480 
    481   // Compare the values of CPU word size.
    482   const Char* word_end = AlignToMachineWord(end);
    483   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
    484   while (characters < word_end) {
    485     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
    486     characters += loop_increment;
    487   }
    488 
    489   // Process the remaining bytes.
    490   while (characters != end) {
    491     all_char_bits |= *characters;
    492     ++characters;
    493   }
    494 
    495   MachineWord non_ascii_bit_mask =
    496       NonASCIIMask<sizeof(MachineWord), Char>::value();
    497   return !(all_char_bits & non_ascii_bit_mask);
    498 }
    499 
    500 bool IsStringASCII(StringPiece str) {
    501   return DoIsStringASCII(str.data(), str.length());
    502 }
    503 
    504 bool IsStringASCII(StringPiece16 str) {
    505   return DoIsStringASCII(str.data(), str.length());
    506 }
    507 
    508 #if defined(WCHAR_T_IS_UTF32)
    509 bool IsStringASCII(WStringPiece str) {
    510   return DoIsStringASCII(str.data(), str.length());
    511 }
    512 #endif
    513 
    514 bool IsStringUTF8(StringPiece str) {
    515   const char *src = str.data();
    516   int32_t src_len = static_cast<int32_t>(str.length());
    517   int32_t char_index = 0;
    518 
    519   while (char_index < src_len) {
    520     int32_t code_point;
    521     CBU8_NEXT(src, char_index, src_len, code_point);
    522     if (!IsValidCharacter(code_point))
    523       return false;
    524   }
    525   return true;
    526 }
    527 
    528 // Implementation note: Normally this function will be called with a hardcoded
    529 // constant for the lowercase_ascii parameter. Constructing a StringPiece from
    530 // a C constant requires running strlen, so the result will be two passes
    531 // through the buffers, one to file the length of lowercase_ascii, and one to
    532 // compare each letter.
    533 //
    534 // This function could have taken a const char* to avoid this and only do one
    535 // pass through the string. But the strlen is faster than the case-insensitive
    536 // compares and lets us early-exit in the case that the strings are different
    537 // lengths (will often be the case for non-matches). So whether one approach or
    538 // the other will be faster depends on the case.
    539 //
    540 // The hardcoded strings are typically very short so it doesn't matter, and the
    541 // string piece gives additional flexibility for the caller (doesn't have to be
    542 // null terminated) so we choose the StringPiece route.
    543 template<typename Str>
    544 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
    545                                           StringPiece lowercase_ascii) {
    546   if (str.size() != lowercase_ascii.size())
    547     return false;
    548   for (size_t i = 0; i < str.size(); i++) {
    549     if (ToLowerASCII(str[i]) != lowercase_ascii[i])
    550       return false;
    551   }
    552   return true;
    553 }
    554 
    555 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
    556   return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
    557 }
    558 
    559 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
    560   return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
    561 }
    562 
    563 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
    564   if (str.length() != ascii.length())
    565     return false;
    566   return std::equal(ascii.begin(), ascii.end(), str.begin());
    567 }
    568 
    569 template<typename Str>
    570 bool StartsWithT(BasicStringPiece<Str> str,
    571                  BasicStringPiece<Str> search_for,
    572                  CompareCase case_sensitivity) {
    573   if (search_for.size() > str.size())
    574     return false;
    575 
    576   BasicStringPiece<Str> source = str.substr(0, search_for.size());
    577 
    578   switch (case_sensitivity) {
    579     case CompareCase::SENSITIVE:
    580       return source == search_for;
    581 
    582     case CompareCase::INSENSITIVE_ASCII:
    583       return std::equal(
    584           search_for.begin(), search_for.end(),
    585           source.begin(),
    586           CaseInsensitiveCompareASCII<typename Str::value_type>());
    587 
    588     default:
    589       NOTREACHED();
    590       return false;
    591   }
    592 }
    593 
    594 bool StartsWith(StringPiece str,
    595                 StringPiece search_for,
    596                 CompareCase case_sensitivity) {
    597   return StartsWithT<std::string>(str, search_for, case_sensitivity);
    598 }
    599 
    600 bool StartsWith(StringPiece16 str,
    601                 StringPiece16 search_for,
    602                 CompareCase case_sensitivity) {
    603   return StartsWithT<string16>(str, search_for, case_sensitivity);
    604 }
    605 
    606 template <typename Str>
    607 bool EndsWithT(BasicStringPiece<Str> str,
    608                BasicStringPiece<Str> search_for,
    609                CompareCase case_sensitivity) {
    610   if (search_for.size() > str.size())
    611     return false;
    612 
    613   BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
    614                                             search_for.size());
    615 
    616   switch (case_sensitivity) {
    617     case CompareCase::SENSITIVE:
    618       return source == search_for;
    619 
    620     case CompareCase::INSENSITIVE_ASCII:
    621       return std::equal(
    622           source.begin(), source.end(),
    623           search_for.begin(),
    624           CaseInsensitiveCompareASCII<typename Str::value_type>());
    625 
    626     default:
    627       NOTREACHED();
    628       return false;
    629   }
    630 }
    631 
    632 bool EndsWith(StringPiece str,
    633               StringPiece search_for,
    634               CompareCase case_sensitivity) {
    635   return EndsWithT<std::string>(str, search_for, case_sensitivity);
    636 }
    637 
    638 bool EndsWith(StringPiece16 str,
    639               StringPiece16 search_for,
    640               CompareCase case_sensitivity) {
    641   return EndsWithT<string16>(str, search_for, case_sensitivity);
    642 }
    643 
    644 char HexDigitToInt(wchar_t c) {
    645   DCHECK(IsHexDigit(c));
    646   if (c >= '0' && c <= '9')
    647     return static_cast<char>(c - '0');
    648   if (c >= 'A' && c <= 'F')
    649     return static_cast<char>(c - 'A' + 10);
    650   if (c >= 'a' && c <= 'f')
    651     return static_cast<char>(c - 'a' + 10);
    652   return 0;
    653 }
    654 
    655 bool IsUnicodeWhitespace(wchar_t c) {
    656   // kWhitespaceWide is a NULL-terminated string
    657   for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) {
    658     if (*cur == c)
    659       return true;
    660   }
    661   return false;
    662 }
    663 
    664 static const char* const kByteStringsUnlocalized[] = {
    665   " B",
    666   " kB",
    667   " MB",
    668   " GB",
    669   " TB",
    670   " PB"
    671 };
    672 
    673 string16 FormatBytesUnlocalized(int64_t bytes) {
    674   double unit_amount = static_cast<double>(bytes);
    675   size_t dimension = 0;
    676   const int kKilo = 1024;
    677   while (unit_amount >= kKilo &&
    678          dimension < arraysize(kByteStringsUnlocalized) - 1) {
    679     unit_amount /= kKilo;
    680     dimension++;
    681   }
    682 
    683   char buf[64];
    684   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
    685     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
    686                    kByteStringsUnlocalized[dimension]);
    687   } else {
    688     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
    689                    kByteStringsUnlocalized[dimension]);
    690   }
    691 
    692   return ASCIIToUTF16(buf);
    693 }
    694 
    695 // A Matcher for DoReplaceMatchesAfterOffset() that matches substrings.
    696 template <class StringType>
    697 struct SubstringMatcher {
    698   BasicStringPiece<StringType> find_this;
    699 
    700   size_t Find(const StringType& input, size_t pos) {
    701     return input.find(find_this.data(), pos, find_this.length());
    702   }
    703   size_t MatchSize() { return find_this.length(); }
    704 };
    705 
    706 // A Matcher for DoReplaceMatchesAfterOffset() that matches single characters.
    707 template <class StringType>
    708 struct CharacterMatcher {
    709   BasicStringPiece<StringType> find_any_of_these;
    710 
    711   size_t Find(const StringType& input, size_t pos) {
    712     return input.find_first_of(find_any_of_these.data(), pos,
    713                                find_any_of_these.length());
    714   }
    715   constexpr size_t MatchSize() { return 1; }
    716 };
    717 
    718 enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST };
    719 
    720 // Runs in O(n) time in the length of |str|, and transforms the string without
    721 // reallocating when possible. Returns |true| if any matches were found.
    722 //
    723 // This is parameterized on a |Matcher| traits type, so that it can be the
    724 // implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset().
    725 template <class StringType, class Matcher>
    726 bool DoReplaceMatchesAfterOffset(StringType* str,
    727                                  size_t initial_offset,
    728                                  Matcher matcher,
    729                                  BasicStringPiece<StringType> replace_with,
    730                                  ReplaceType replace_type) {
    731   using CharTraits = typename StringType::traits_type;
    732 
    733   const size_t find_length = matcher.MatchSize();
    734   if (!find_length)
    735     return false;
    736 
    737   // If the find string doesn't appear, there's nothing to do.
    738   size_t first_match = matcher.Find(*str, initial_offset);
    739   if (first_match == StringType::npos)
    740     return false;
    741 
    742   // If we're only replacing one instance, there's no need to do anything
    743   // complicated.
    744   const size_t replace_length = replace_with.length();
    745   if (replace_type == ReplaceType::REPLACE_FIRST) {
    746     str->replace(first_match, find_length, replace_with.data(), replace_length);
    747     return true;
    748   }
    749 
    750   // If the find and replace strings are the same length, we can simply use
    751   // replace() on each instance, and finish the entire operation in O(n) time.
    752   if (find_length == replace_length) {
    753     auto* buffer = &((*str)[0]);
    754     for (size_t offset = first_match; offset != StringType::npos;
    755          offset = matcher.Find(*str, offset + replace_length)) {
    756       CharTraits::copy(buffer + offset, replace_with.data(), replace_length);
    757     }
    758     return true;
    759   }
    760 
    761   // Since the find and replace strings aren't the same length, a loop like the
    762   // one above would be O(n^2) in the worst case, as replace() will shift the
    763   // entire remaining string each time. We need to be more clever to keep things
    764   // O(n).
    765   //
    766   // When the string is being shortened, it's possible to just shift the matches
    767   // down in one pass while finding, and truncate the length at the end of the
    768   // search.
    769   //
    770   // If the string is being lengthened, more work is required. The strategy used
    771   // here is to make two find() passes through the string. The first pass counts
    772   // the number of matches to determine the new size. The second pass will
    773   // either construct the new string into a new buffer (if the existing buffer
    774   // lacked capacity), or else -- if there is room -- create a region of scratch
    775   // space after |first_match| by shifting the tail of the string to a higher
    776   // index, and doing in-place moves from the tail to lower indices thereafter.
    777   size_t str_length = str->length();
    778   size_t expansion = 0;
    779   if (replace_length > find_length) {
    780     // This operation lengthens the string; determine the new length by counting
    781     // matches.
    782     const size_t expansion_per_match = (replace_length - find_length);
    783     size_t num_matches = 0;
    784     for (size_t match = first_match; match != StringType::npos;
    785          match = matcher.Find(*str, match + find_length)) {
    786       expansion += expansion_per_match;
    787       ++num_matches;
    788     }
    789     const size_t final_length = str_length + expansion;
    790 
    791     if (str->capacity() < final_length) {
    792       // If we'd have to allocate a new buffer to grow the string, build the
    793       // result directly into the new allocation via append().
    794       StringType src(str->get_allocator());
    795       str->swap(src);
    796       str->reserve(final_length);
    797 
    798       size_t pos = 0;
    799       for (size_t match = first_match;; match = matcher.Find(src, pos)) {
    800         str->append(src, pos, match - pos);
    801         str->append(replace_with.data(), replace_length);
    802         pos = match + find_length;
    803 
    804         // A mid-loop test/break enables skipping the final Find() call; the
    805         // number of matches is known, so don't search past the last one.
    806         if (!--num_matches)
    807           break;
    808       }
    809 
    810       // Handle substring after the final match.
    811       str->append(src, pos, str_length - pos);
    812       return true;
    813     }
    814 
    815     // Prepare for the copy/move loop below -- expand the string to its final
    816     // size by shifting the data after the first match to the end of the resized
    817     // string.
    818     size_t shift_src = first_match + find_length;
    819     size_t shift_dst = shift_src + expansion;
    820 
    821     // Big |expansion| factors (relative to |str_length|) require padding up to
    822     // |shift_dst|.
    823     if (shift_dst > str_length)
    824       str->resize(shift_dst);
    825 
    826     str->replace(shift_dst, str_length - shift_src, *str, shift_src,
    827                  str_length - shift_src);
    828     str_length = final_length;
    829   }
    830 
    831   // We can alternate replacement and move operations. This won't overwrite the
    832   // unsearched region of the string so long as |write_offset| <= |read_offset|;
    833   // that condition is always satisfied because:
    834   //
    835   //   (a) If the string is being shortened, |expansion| is zero and
    836   //       |write_offset| grows slower than |read_offset|.
    837   //
    838   //   (b) If the string is being lengthened, |write_offset| grows faster than
    839   //       |read_offset|, but |expansion| is big enough so that |write_offset|
    840   //       will only catch up to |read_offset| at the point of the last match.
    841   auto* buffer = &((*str)[0]);
    842   size_t write_offset = first_match;
    843   size_t read_offset = first_match + expansion;
    844   do {
    845     if (replace_length) {
    846       CharTraits::copy(buffer + write_offset, replace_with.data(),
    847                        replace_length);
    848       write_offset += replace_length;
    849     }
    850     read_offset += find_length;
    851 
    852     // min() clamps StringType::npos (the largest unsigned value) to str_length.
    853     size_t match = std::min(matcher.Find(*str, read_offset), str_length);
    854 
    855     size_t length = match - read_offset;
    856     if (length) {
    857       CharTraits::move(buffer + write_offset, buffer + read_offset, length);
    858       write_offset += length;
    859       read_offset += length;
    860     }
    861   } while (read_offset < str_length);
    862 
    863   // If we're shortening the string, truncate it now.
    864   str->resize(write_offset);
    865   return true;
    866 }
    867 
    868 template <class StringType>
    869 bool ReplaceCharsT(const StringType& input,
    870                    BasicStringPiece<StringType> find_any_of_these,
    871                    BasicStringPiece<StringType> replace_with,
    872                    StringType* output) {
    873   // Commonly, this is called with output and input being the same string; in
    874   // that case, this assignment is inexpensive.
    875   *output = input;
    876 
    877   return DoReplaceMatchesAfterOffset(
    878       output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with,
    879       ReplaceType::REPLACE_ALL);
    880 }
    881 
    882 void ReplaceFirstSubstringAfterOffset(string16* str,
    883                                       size_t start_offset,
    884                                       StringPiece16 find_this,
    885                                       StringPiece16 replace_with) {
    886   DoReplaceMatchesAfterOffset(str, start_offset,
    887                               SubstringMatcher<string16>{find_this},
    888                               replace_with, ReplaceType::REPLACE_FIRST);
    889 }
    890 
    891 void ReplaceFirstSubstringAfterOffset(std::string* str,
    892                                       size_t start_offset,
    893                                       StringPiece find_this,
    894                                       StringPiece replace_with) {
    895   DoReplaceMatchesAfterOffset(str, start_offset,
    896                               SubstringMatcher<std::string>{find_this},
    897                               replace_with, ReplaceType::REPLACE_FIRST);
    898 }
    899 
    900 void ReplaceSubstringsAfterOffset(string16* str,
    901                                   size_t start_offset,
    902                                   StringPiece16 find_this,
    903                                   StringPiece16 replace_with) {
    904   DoReplaceMatchesAfterOffset(str, start_offset,
    905                               SubstringMatcher<string16>{find_this},
    906                               replace_with, ReplaceType::REPLACE_ALL);
    907 }
    908 
    909 void ReplaceSubstringsAfterOffset(std::string* str,
    910                                   size_t start_offset,
    911                                   StringPiece find_this,
    912                                   StringPiece replace_with) {
    913   DoReplaceMatchesAfterOffset(str, start_offset,
    914                               SubstringMatcher<std::string>{find_this},
    915                               replace_with, ReplaceType::REPLACE_ALL);
    916 }
    917 
    918 template <class string_type>
    919 inline typename string_type::value_type* WriteIntoT(string_type* str,
    920                                                     size_t length_with_null) {
    921   DCHECK_GT(length_with_null, 1u);
    922   str->reserve(length_with_null);
    923   str->resize(length_with_null - 1);
    924   return &((*str)[0]);
    925 }
    926 
    927 char* WriteInto(std::string* str, size_t length_with_null) {
    928   return WriteIntoT(str, length_with_null);
    929 }
    930 
    931 char16* WriteInto(string16* str, size_t length_with_null) {
    932   return WriteIntoT(str, length_with_null);
    933 }
    934 
    935 #if defined(_MSC_VER) && !defined(__clang__)
    936 // Work around VC++ code-gen bug. https://crbug.com/804884
    937 #pragma optimize("", off)
    938 #endif
    939 
    940 // Generic version for all JoinString overloads. |list_type| must be a sequence
    941 // (std::vector or std::initializer_list) of strings/StringPieces (std::string,
    942 // string16, StringPiece or StringPiece16). |string_type| is either std::string
    943 // or string16.
    944 template <typename list_type, typename string_type>
    945 static string_type JoinStringT(const list_type& parts,
    946                                BasicStringPiece<string_type> sep) {
    947   if (parts.size() == 0)
    948     return string_type();
    949 
    950   // Pre-allocate the eventual size of the string. Start with the size of all of
    951   // the separators (note that this *assumes* parts.size() > 0).
    952   size_t total_size = (parts.size() - 1) * sep.size();
    953   for (const auto& part : parts)
    954     total_size += part.size();
    955   string_type result;
    956   result.reserve(total_size);
    957 
    958   auto iter = parts.begin();
    959   DCHECK(iter != parts.end());
    960   AppendToString(&result, *iter);
    961   ++iter;
    962 
    963   for (; iter != parts.end(); ++iter) {
    964     sep.AppendToString(&result);
    965     // Using the overloaded AppendToString allows this template function to work
    966     // on both strings and StringPieces without creating an intermediate
    967     // StringPiece object.
    968     AppendToString(&result, *iter);
    969   }
    970 
    971   // Sanity-check that we pre-allocated correctly.
    972   DCHECK_EQ(total_size, result.size());
    973 
    974   return result;
    975 }
    976 
    977 std::string JoinString(const std::vector<std::string>& parts,
    978                        StringPiece separator) {
    979   return JoinStringT(parts, separator);
    980 }
    981 
    982 string16 JoinString(const std::vector<string16>& parts,
    983                     StringPiece16 separator) {
    984   return JoinStringT(parts, separator);
    985 }
    986 
    987 #if defined(_MSC_VER) && !defined(__clang__)
    988 // Work around VC++ code-gen bug. https://crbug.com/804884
    989 #pragma optimize("", on)
    990 #endif
    991 
    992 std::string JoinString(const std::vector<StringPiece>& parts,
    993                        StringPiece separator) {
    994   return JoinStringT(parts, separator);
    995 }
    996 
    997 string16 JoinString(const std::vector<StringPiece16>& parts,
    998                     StringPiece16 separator) {
    999   return JoinStringT(parts, separator);
   1000 }
   1001 
   1002 std::string JoinString(std::initializer_list<StringPiece> parts,
   1003                        StringPiece separator) {
   1004   return JoinStringT(parts, separator);
   1005 }
   1006 
   1007 string16 JoinString(std::initializer_list<StringPiece16> parts,
   1008                     StringPiece16 separator) {
   1009   return JoinStringT(parts, separator);
   1010 }
   1011 
   1012 template<class FormatStringType, class OutStringType>
   1013 OutStringType DoReplaceStringPlaceholders(
   1014     const FormatStringType& format_string,
   1015     const std::vector<OutStringType>& subst,
   1016     std::vector<size_t>* offsets) {
   1017   size_t substitutions = subst.size();
   1018   DCHECK_LT(substitutions, 10U);
   1019 
   1020   size_t sub_length = 0;
   1021   for (const auto& cur : subst)
   1022     sub_length += cur.length();
   1023 
   1024   OutStringType formatted;
   1025   formatted.reserve(format_string.length() + sub_length);
   1026 
   1027   std::vector<ReplacementOffset> r_offsets;
   1028   for (auto i = format_string.begin(); i != format_string.end(); ++i) {
   1029     if ('$' == *i) {
   1030       if (i + 1 != format_string.end()) {
   1031         ++i;
   1032         if ('$' == *i) {
   1033           while (i != format_string.end() && '$' == *i) {
   1034             formatted.push_back('$');
   1035             ++i;
   1036           }
   1037           --i;
   1038         } else {
   1039           if (*i < '1' || *i > '9') {
   1040             DLOG(ERROR) << "Invalid placeholder: $" << *i;
   1041             continue;
   1042           }
   1043           uintptr_t index = *i - '1';
   1044           if (offsets) {
   1045             ReplacementOffset r_offset(index,
   1046                                        static_cast<int>(formatted.size()));
   1047             r_offsets.insert(
   1048                 std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset,
   1049                                  &CompareParameter),
   1050                 r_offset);
   1051           }
   1052           if (index < substitutions)
   1053             formatted.append(subst.at(index));
   1054         }
   1055       }
   1056     } else {
   1057       formatted.push_back(*i);
   1058     }
   1059   }
   1060   if (offsets) {
   1061     for (const auto& cur : r_offsets)
   1062       offsets->push_back(cur.offset);
   1063   }
   1064   return formatted;
   1065 }
   1066 
   1067 string16 ReplaceStringPlaceholders(const string16& format_string,
   1068                                    const std::vector<string16>& subst,
   1069                                    std::vector<size_t>* offsets) {
   1070   return DoReplaceStringPlaceholders(format_string, subst, offsets);
   1071 }
   1072 
   1073 std::string ReplaceStringPlaceholders(StringPiece format_string,
   1074                                       const std::vector<std::string>& subst,
   1075                                       std::vector<size_t>* offsets) {
   1076   return DoReplaceStringPlaceholders(format_string, subst, offsets);
   1077 }
   1078 
   1079 string16 ReplaceStringPlaceholders(const string16& format_string,
   1080                                    const string16& a,
   1081                                    size_t* offset) {
   1082   std::vector<size_t> offsets;
   1083   std::vector<string16> subst;
   1084   subst.push_back(a);
   1085   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
   1086 
   1087   DCHECK_EQ(1U, offsets.size());
   1088   if (offset)
   1089     *offset = offsets[0];
   1090   return result;
   1091 }
   1092 
   1093 // The following code is compatible with the OpenBSD lcpy interface.  See:
   1094 //   http://www.gratisoft.us/todd/papers/strlcpy.html
   1095 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
   1096 
   1097 namespace {
   1098 
   1099 template <typename CHAR>
   1100 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
   1101   for (size_t i = 0; i < dst_size; ++i) {
   1102     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
   1103       return i;
   1104   }
   1105 
   1106   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
   1107   if (dst_size != 0)
   1108     dst[dst_size - 1] = 0;
   1109 
   1110   // Count the rest of the |src|, and return it's length in characters.
   1111   while (src[dst_size]) ++dst_size;
   1112   return dst_size;
   1113 }
   1114 
   1115 }  // namespace
   1116 
   1117 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
   1118   return lcpyT<char>(dst, src, dst_size);
   1119 }
   1120 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
   1121   return lcpyT<wchar_t>(dst, src, dst_size);
   1122 }
   1123 
   1124 }  // namespace base
   1125