Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <ctype.h>
      8 #include <errno.h>
      9 #include <math.h>
     10 #include <stdarg.h>
     11 #include <stdint.h>
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 #include <time.h>
     16 #include <wchar.h>
     17 #include <wctype.h>
     18 
     19 #include <algorithm>
     20 #include <limits>
     21 #include <vector>
     22 
     23 #include "base/logging.h"
     24 #include "base/macros.h"
     25 #include "base/memory/singleton.h"
     26 #include "base/strings/utf_string_conversion_utils.h"
     27 #include "base/strings/utf_string_conversions.h"
     28 #include "base/third_party/icu/icu_utf.h"
     29 #include "build/build_config.h"
     30 
     31 namespace base {
     32 
     33 namespace {
     34 
     35 // Force the singleton used by EmptyString[16] to be a unique type. This
     36 // prevents other code that might accidentally use Singleton<string> from
     37 // getting our internal one.
     38 struct EmptyStrings {
     39   EmptyStrings() {}
     40   const std::string s;
     41   const string16 s16;
     42 
     43   static EmptyStrings* GetInstance() {
     44     return Singleton<EmptyStrings>::get();
     45   }
     46 };
     47 
     48 // Used by ReplaceStringPlaceholders to track the position in the string of
     49 // replaced parameters.
     50 struct ReplacementOffset {
     51   ReplacementOffset(uintptr_t parameter, size_t offset)
     52       : parameter(parameter),
     53         offset(offset) {}
     54 
     55   // Index of the parameter.
     56   uintptr_t parameter;
     57 
     58   // Starting position in the string.
     59   size_t offset;
     60 };
     61 
     62 static bool CompareParameter(const ReplacementOffset& elem1,
     63                              const ReplacementOffset& elem2) {
     64   return elem1.parameter < elem2.parameter;
     65 }
     66 
     67 // Assuming that a pointer is the size of a "machine word", then
     68 // uintptr_t is an integer type that is also a machine word.
     69 typedef uintptr_t MachineWord;
     70 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
     71 
     72 inline bool IsAlignedToMachineWord(const void* pointer) {
     73   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
     74 }
     75 
     76 template<typename T> inline T* AlignToMachineWord(T* pointer) {
     77   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
     78                               ~kMachineWordAlignmentMask);
     79 }
     80 
     81 template<size_t size, typename CharacterType> struct NonASCIIMask;
     82 template<> struct NonASCIIMask<4, char16> {
     83     static inline uint32_t value() { return 0xFF80FF80U; }
     84 };
     85 template<> struct NonASCIIMask<4, char> {
     86     static inline uint32_t value() { return 0x80808080U; }
     87 };
     88 template<> struct NonASCIIMask<8, char16> {
     89     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
     90 };
     91 template<> struct NonASCIIMask<8, char> {
     92     static inline uint64_t value() { return 0x8080808080808080ULL; }
     93 };
     94 #if defined(WCHAR_T_IS_UTF32)
     95 template<> struct NonASCIIMask<4, wchar_t> {
     96     static inline uint32_t value() { return 0xFFFFFF80U; }
     97 };
     98 template<> struct NonASCIIMask<8, wchar_t> {
     99     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
    100 };
    101 #endif  // WCHAR_T_IS_UTF32
    102 
    103 }  // namespace
    104 
    105 bool IsWprintfFormatPortable(const wchar_t* format) {
    106   for (const wchar_t* position = format; *position != '\0'; ++position) {
    107     if (*position == '%') {
    108       bool in_specification = true;
    109       bool modifier_l = false;
    110       while (in_specification) {
    111         // Eat up characters until reaching a known specifier.
    112         if (*++position == '\0') {
    113           // The format string ended in the middle of a specification.  Call
    114           // it portable because no unportable specifications were found.  The
    115           // string is equally broken on all platforms.
    116           return true;
    117         }
    118 
    119         if (*position == 'l') {
    120           // 'l' is the only thing that can save the 's' and 'c' specifiers.
    121           modifier_l = true;
    122         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
    123                    *position == 'S' || *position == 'C' || *position == 'F' ||
    124                    *position == 'D' || *position == 'O' || *position == 'U') {
    125           // Not portable.
    126           return false;
    127         }
    128 
    129         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
    130           // Portable, keep scanning the rest of the format string.
    131           in_specification = false;
    132         }
    133       }
    134     }
    135   }
    136 
    137   return true;
    138 }
    139 
    140 namespace {
    141 
    142 template<typename StringType>
    143 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
    144   StringType ret;
    145   ret.reserve(str.size());
    146   for (size_t i = 0; i < str.size(); i++)
    147     ret.push_back(ToLowerASCII(str[i]));
    148   return ret;
    149 }
    150 
    151 template<typename StringType>
    152 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
    153   StringType ret;
    154   ret.reserve(str.size());
    155   for (size_t i = 0; i < str.size(); i++)
    156     ret.push_back(ToUpperASCII(str[i]));
    157   return ret;
    158 }
    159 
    160 }  // namespace
    161 
    162 std::string ToLowerASCII(StringPiece str) {
    163   return ToLowerASCIIImpl<std::string>(str);
    164 }
    165 
    166 string16 ToLowerASCII(StringPiece16 str) {
    167   return ToLowerASCIIImpl<string16>(str);
    168 }
    169 
    170 std::string ToUpperASCII(StringPiece str) {
    171   return ToUpperASCIIImpl<std::string>(str);
    172 }
    173 
    174 string16 ToUpperASCII(StringPiece16 str) {
    175   return ToUpperASCIIImpl<string16>(str);
    176 }
    177 
    178 template<class StringType>
    179 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
    180                                  BasicStringPiece<StringType> b) {
    181   // Find the first characters that aren't equal and compare them.  If the end
    182   // of one of the strings is found before a nonequal character, the lengths
    183   // of the strings are compared.
    184   size_t i = 0;
    185   while (i < a.length() && i < b.length()) {
    186     typename StringType::value_type lower_a = ToLowerASCII(a[i]);
    187     typename StringType::value_type lower_b = ToLowerASCII(b[i]);
    188     if (lower_a < lower_b)
    189       return -1;
    190     if (lower_a > lower_b)
    191       return 1;
    192     i++;
    193   }
    194 
    195   // End of one string hit before finding a different character. Expect the
    196   // common case to be "strings equal" at this point so check that first.
    197   if (a.length() == b.length())
    198     return 0;
    199 
    200   if (a.length() < b.length())
    201     return -1;
    202   return 1;
    203 }
    204 
    205 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    206   return CompareCaseInsensitiveASCIIT<std::string>(a, b);
    207 }
    208 
    209 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    210   return CompareCaseInsensitiveASCIIT<string16>(a, b);
    211 }
    212 
    213 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    214   if (a.length() != b.length())
    215     return false;
    216   return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
    217 }
    218 
    219 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    220   if (a.length() != b.length())
    221     return false;
    222   return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
    223 }
    224 
    225 const std::string& EmptyString() {
    226   return EmptyStrings::GetInstance()->s;
    227 }
    228 
    229 const string16& EmptyString16() {
    230   return EmptyStrings::GetInstance()->s16;
    231 }
    232 
    233 template<typename STR>
    234 bool ReplaceCharsT(const STR& input,
    235                    const STR& replace_chars,
    236                    const STR& replace_with,
    237                    STR* output) {
    238   bool removed = false;
    239   size_t replace_length = replace_with.length();
    240 
    241   *output = input;
    242 
    243   size_t found = output->find_first_of(replace_chars);
    244   while (found != STR::npos) {
    245     removed = true;
    246     output->replace(found, 1, replace_with);
    247     found = output->find_first_of(replace_chars, found + replace_length);
    248   }
    249 
    250   return removed;
    251 }
    252 
    253 bool ReplaceChars(const string16& input,
    254                   const StringPiece16& replace_chars,
    255                   const string16& replace_with,
    256                   string16* output) {
    257   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    258 }
    259 
    260 bool ReplaceChars(const std::string& input,
    261                   const StringPiece& replace_chars,
    262                   const std::string& replace_with,
    263                   std::string* output) {
    264   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    265 }
    266 
    267 bool RemoveChars(const string16& input,
    268                  const StringPiece16& remove_chars,
    269                  string16* output) {
    270   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
    271 }
    272 
    273 bool RemoveChars(const std::string& input,
    274                  const StringPiece& remove_chars,
    275                  std::string* output) {
    276   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
    277 }
    278 
    279 template<typename Str>
    280 TrimPositions TrimStringT(const Str& input,
    281                           BasicStringPiece<Str> trim_chars,
    282                           TrimPositions positions,
    283                           Str* output) {
    284   // Find the edges of leading/trailing whitespace as desired. Need to use
    285   // a StringPiece version of input to be able to call find* on it with the
    286   // StringPiece version of trim_chars (normally the trim_chars will be a
    287   // constant so avoid making a copy).
    288   BasicStringPiece<Str> input_piece(input);
    289   const size_t last_char = input.length() - 1;
    290   const size_t first_good_char = (positions & TRIM_LEADING) ?
    291       input_piece.find_first_not_of(trim_chars) : 0;
    292   const size_t last_good_char = (positions & TRIM_TRAILING) ?
    293       input_piece.find_last_not_of(trim_chars) : last_char;
    294 
    295   // When the string was all trimmed, report that we stripped off characters
    296   // from whichever position the caller was interested in. For empty input, we
    297   // stripped no characters, but we still need to clear |output|.
    298   if (input.empty() ||
    299       (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
    300     bool input_was_empty = input.empty();  // in case output == &input
    301     output->clear();
    302     return input_was_empty ? TRIM_NONE : positions;
    303   }
    304 
    305   // Trim.
    306   *output =
    307       input.substr(first_good_char, last_good_char - first_good_char + 1);
    308 
    309   // Return where we trimmed from.
    310   return static_cast<TrimPositions>(
    311       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
    312       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
    313 }
    314 
    315 bool TrimString(const string16& input,
    316                 StringPiece16 trim_chars,
    317                 string16* output) {
    318   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    319 }
    320 
    321 bool TrimString(const std::string& input,
    322                 StringPiece trim_chars,
    323                 std::string* output) {
    324   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    325 }
    326 
    327 template<typename Str>
    328 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
    329                                        BasicStringPiece<Str> trim_chars,
    330                                        TrimPositions positions) {
    331   size_t begin = (positions & TRIM_LEADING) ?
    332       input.find_first_not_of(trim_chars) : 0;
    333   size_t end = (positions & TRIM_TRAILING) ?
    334       input.find_last_not_of(trim_chars) + 1 : input.size();
    335   return input.substr(begin, end - begin);
    336 }
    337 
    338 StringPiece16 TrimString(StringPiece16 input,
    339                          const StringPiece16& trim_chars,
    340                          TrimPositions positions) {
    341   return TrimStringPieceT(input, trim_chars, positions);
    342 }
    343 
    344 StringPiece TrimString(StringPiece input,
    345                        const StringPiece& trim_chars,
    346                        TrimPositions positions) {
    347   return TrimStringPieceT(input, trim_chars, positions);
    348 }
    349 
    350 void TruncateUTF8ToByteSize(const std::string& input,
    351                             const size_t byte_size,
    352                             std::string* output) {
    353   DCHECK(output);
    354   if (byte_size > input.length()) {
    355     *output = input;
    356     return;
    357   }
    358   DCHECK_LE(byte_size,
    359             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
    360   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
    361   int32_t truncation_length = static_cast<int32_t>(byte_size);
    362   int32_t char_index = truncation_length - 1;
    363   const char* data = input.data();
    364 
    365   // Using CBU8, we will move backwards from the truncation point
    366   // to the beginning of the string looking for a valid UTF8
    367   // character.  Once a full UTF8 character is found, we will
    368   // truncate the string to the end of that character.
    369   while (char_index >= 0) {
    370     int32_t prev = char_index;
    371     base_icu::UChar32 code_point = 0;
    372     CBU8_NEXT(data, char_index, truncation_length, code_point);
    373     if (!IsValidCharacter(code_point) ||
    374         !IsValidCodepoint(code_point)) {
    375       char_index = prev - 1;
    376     } else {
    377       break;
    378     }
    379   }
    380 
    381   if (char_index >= 0 )
    382     *output = input.substr(0, char_index);
    383   else
    384     output->clear();
    385 }
    386 
    387 TrimPositions TrimWhitespace(const string16& input,
    388                              TrimPositions positions,
    389                              string16* output) {
    390   return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
    391 }
    392 
    393 StringPiece16 TrimWhitespace(StringPiece16 input,
    394                              TrimPositions positions) {
    395   return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
    396 }
    397 
    398 TrimPositions TrimWhitespaceASCII(const std::string& input,
    399                                   TrimPositions positions,
    400                                   std::string* output) {
    401   return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
    402 }
    403 
    404 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
    405   return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
    406 }
    407 
    408 template<typename STR>
    409 STR CollapseWhitespaceT(const STR& text,
    410                         bool trim_sequences_with_line_breaks) {
    411   STR result;
    412   result.resize(text.size());
    413 
    414   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
    415   // will trim any leading whitespace.
    416   bool in_whitespace = true;
    417   bool already_trimmed = true;
    418 
    419   int chars_written = 0;
    420   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
    421     if (IsUnicodeWhitespace(*i)) {
    422       if (!in_whitespace) {
    423         // Reduce all whitespace sequences to a single space.
    424         in_whitespace = true;
    425         result[chars_written++] = L' ';
    426       }
    427       if (trim_sequences_with_line_breaks && !already_trimmed &&
    428           ((*i == '\n') || (*i == '\r'))) {
    429         // Whitespace sequences containing CR or LF are eliminated entirely.
    430         already_trimmed = true;
    431         --chars_written;
    432       }
    433     } else {
    434       // Non-whitespace chracters are copied straight across.
    435       in_whitespace = false;
    436       already_trimmed = false;
    437       result[chars_written++] = *i;
    438     }
    439   }
    440 
    441   if (in_whitespace && !already_trimmed) {
    442     // Any trailing whitespace is eliminated.
    443     --chars_written;
    444   }
    445 
    446   result.resize(chars_written);
    447   return result;
    448 }
    449 
    450 string16 CollapseWhitespace(const string16& text,
    451                             bool trim_sequences_with_line_breaks) {
    452   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    453 }
    454 
    455 std::string CollapseWhitespaceASCII(const std::string& text,
    456                                     bool trim_sequences_with_line_breaks) {
    457   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    458 }
    459 
    460 bool ContainsOnlyChars(const StringPiece& input,
    461                        const StringPiece& characters) {
    462   return input.find_first_not_of(characters) == StringPiece::npos;
    463 }
    464 
    465 bool ContainsOnlyChars(const StringPiece16& input,
    466                        const StringPiece16& characters) {
    467   return input.find_first_not_of(characters) == StringPiece16::npos;
    468 }
    469 
    470 template <class Char>
    471 inline bool DoIsStringASCII(const Char* characters, size_t length) {
    472   MachineWord all_char_bits = 0;
    473   const Char* end = characters + length;
    474 
    475   // Prologue: align the input.
    476   while (!IsAlignedToMachineWord(characters) && characters != end) {
    477     all_char_bits |= *characters;
    478     ++characters;
    479   }
    480 
    481   // Compare the values of CPU word size.
    482   const Char* word_end = AlignToMachineWord(end);
    483   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
    484   while (characters < word_end) {
    485     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
    486     characters += loop_increment;
    487   }
    488 
    489   // Process the remaining bytes.
    490   while (characters != end) {
    491     all_char_bits |= *characters;
    492     ++characters;
    493   }
    494 
    495   MachineWord non_ascii_bit_mask =
    496       NonASCIIMask<sizeof(MachineWord), Char>::value();
    497   return !(all_char_bits & non_ascii_bit_mask);
    498 }
    499 
    500 bool IsStringASCII(const StringPiece& str) {
    501   return DoIsStringASCII(str.data(), str.length());
    502 }
    503 
    504 bool IsStringASCII(const StringPiece16& str) {
    505   return DoIsStringASCII(str.data(), str.length());
    506 }
    507 
    508 bool IsStringASCII(const string16& str) {
    509   return DoIsStringASCII(str.data(), str.length());
    510 }
    511 
    512 #if defined(WCHAR_T_IS_UTF32)
    513 bool IsStringASCII(const std::wstring& str) {
    514   return DoIsStringASCII(str.data(), str.length());
    515 }
    516 #endif
    517 
    518 bool IsStringUTF8(const StringPiece& str) {
    519   const char *src = str.data();
    520   int32_t src_len = static_cast<int32_t>(str.length());
    521   int32_t char_index = 0;
    522 
    523   while (char_index < src_len) {
    524     int32_t code_point;
    525     CBU8_NEXT(src, char_index, src_len, code_point);
    526     if (!IsValidCharacter(code_point))
    527       return false;
    528   }
    529   return true;
    530 }
    531 
    532 // Implementation note: Normally this function will be called with a hardcoded
    533 // constant for the lowercase_ascii parameter. Constructing a StringPiece from
    534 // a C constant requires running strlen, so the result will be two passes
    535 // through the buffers, one to file the length of lowercase_ascii, and one to
    536 // compare each letter.
    537 //
    538 // This function could have taken a const char* to avoid this and only do one
    539 // pass through the string. But the strlen is faster than the case-insensitive
    540 // compares and lets us early-exit in the case that the strings are different
    541 // lengths (will often be the case for non-matches). So whether one approach or
    542 // the other will be faster depends on the case.
    543 //
    544 // The hardcoded strings are typically very short so it doesn't matter, and the
    545 // string piece gives additional flexibility for the caller (doesn't have to be
    546 // null terminated) so we choose the StringPiece route.
    547 template<typename Str>
    548 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
    549                                           StringPiece lowercase_ascii) {
    550   if (str.size() != lowercase_ascii.size())
    551     return false;
    552   for (size_t i = 0; i < str.size(); i++) {
    553     if (ToLowerASCII(str[i]) != lowercase_ascii[i])
    554       return false;
    555   }
    556   return true;
    557 }
    558 
    559 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
    560   return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
    561 }
    562 
    563 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
    564   return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
    565 }
    566 
    567 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
    568   if (str.length() != ascii.length())
    569     return false;
    570   return std::equal(ascii.begin(), ascii.end(), str.begin());
    571 }
    572 
    573 template<typename Str>
    574 bool StartsWithT(BasicStringPiece<Str> str,
    575                  BasicStringPiece<Str> search_for,
    576                  CompareCase case_sensitivity) {
    577   if (search_for.size() > str.size())
    578     return false;
    579 
    580   BasicStringPiece<Str> source = str.substr(0, search_for.size());
    581 
    582   switch (case_sensitivity) {
    583     case CompareCase::SENSITIVE:
    584       return source == search_for;
    585 
    586     case CompareCase::INSENSITIVE_ASCII:
    587       return std::equal(
    588           search_for.begin(), search_for.end(),
    589           source.begin(),
    590           CaseInsensitiveCompareASCII<typename Str::value_type>());
    591 
    592     default:
    593       NOTREACHED();
    594       return false;
    595   }
    596 }
    597 
    598 bool StartsWith(StringPiece str,
    599                 StringPiece search_for,
    600                 CompareCase case_sensitivity) {
    601   return StartsWithT<std::string>(str, search_for, case_sensitivity);
    602 }
    603 
    604 bool StartsWith(StringPiece16 str,
    605                 StringPiece16 search_for,
    606                 CompareCase case_sensitivity) {
    607   return StartsWithT<string16>(str, search_for, case_sensitivity);
    608 }
    609 
    610 template <typename Str>
    611 bool EndsWithT(BasicStringPiece<Str> str,
    612                BasicStringPiece<Str> search_for,
    613                CompareCase case_sensitivity) {
    614   if (search_for.size() > str.size())
    615     return false;
    616 
    617   BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
    618                                             search_for.size());
    619 
    620   switch (case_sensitivity) {
    621     case CompareCase::SENSITIVE:
    622       return source == search_for;
    623 
    624     case CompareCase::INSENSITIVE_ASCII:
    625       return std::equal(
    626           source.begin(), source.end(),
    627           search_for.begin(),
    628           CaseInsensitiveCompareASCII<typename Str::value_type>());
    629 
    630     default:
    631       NOTREACHED();
    632       return false;
    633   }
    634 }
    635 
    636 bool EndsWith(StringPiece str,
    637               StringPiece search_for,
    638               CompareCase case_sensitivity) {
    639   return EndsWithT<std::string>(str, search_for, case_sensitivity);
    640 }
    641 
    642 bool EndsWith(StringPiece16 str,
    643               StringPiece16 search_for,
    644               CompareCase case_sensitivity) {
    645   return EndsWithT<string16>(str, search_for, case_sensitivity);
    646 }
    647 
    648 char HexDigitToInt(wchar_t c) {
    649   DCHECK(IsHexDigit(c));
    650   if (c >= '0' && c <= '9')
    651     return static_cast<char>(c - '0');
    652   if (c >= 'A' && c <= 'F')
    653     return static_cast<char>(c - 'A' + 10);
    654   if (c >= 'a' && c <= 'f')
    655     return static_cast<char>(c - 'a' + 10);
    656   return 0;
    657 }
    658 
    659 bool IsUnicodeWhitespace(wchar_t c) {
    660   // kWhitespaceWide is a NULL-terminated string
    661   for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) {
    662     if (*cur == c)
    663       return true;
    664   }
    665   return false;
    666 }
    667 
    668 static const char* const kByteStringsUnlocalized[] = {
    669   " B",
    670   " kB",
    671   " MB",
    672   " GB",
    673   " TB",
    674   " PB"
    675 };
    676 
    677 string16 FormatBytesUnlocalized(int64_t bytes) {
    678   double unit_amount = static_cast<double>(bytes);
    679   size_t dimension = 0;
    680   const int kKilo = 1024;
    681   while (unit_amount >= kKilo &&
    682          dimension < arraysize(kByteStringsUnlocalized) - 1) {
    683     unit_amount /= kKilo;
    684     dimension++;
    685   }
    686 
    687   char buf[64];
    688   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
    689     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
    690                    kByteStringsUnlocalized[dimension]);
    691   } else {
    692     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
    693                    kByteStringsUnlocalized[dimension]);
    694   }
    695 
    696   return ASCIIToUTF16(buf);
    697 }
    698 
    699 // Runs in O(n) time in the length of |str|.
    700 template<class StringType>
    701 void DoReplaceSubstringsAfterOffset(StringType* str,
    702                                     size_t offset,
    703                                     BasicStringPiece<StringType> find_this,
    704                                     BasicStringPiece<StringType> replace_with,
    705                                     bool replace_all) {
    706   DCHECK(!find_this.empty());
    707 
    708   // If the find string doesn't appear, there's nothing to do.
    709   offset = str->find(find_this.data(), offset, find_this.size());
    710   if (offset == StringType::npos)
    711     return;
    712 
    713   // If we're only replacing one instance, there's no need to do anything
    714   // complicated.
    715   size_t find_length = find_this.length();
    716   if (!replace_all) {
    717     str->replace(offset, find_length, replace_with.data(), replace_with.size());
    718     return;
    719   }
    720 
    721   // If the find and replace strings are the same length, we can simply use
    722   // replace() on each instance, and finish the entire operation in O(n) time.
    723   size_t replace_length = replace_with.length();
    724   if (find_length == replace_length) {
    725     do {
    726       str->replace(offset, find_length,
    727                    replace_with.data(), replace_with.size());
    728       offset = str->find(find_this.data(), offset + replace_length,
    729                          find_this.size());
    730     } while (offset != StringType::npos);
    731     return;
    732   }
    733 
    734   // Since the find and replace strings aren't the same length, a loop like the
    735   // one above would be O(n^2) in the worst case, as replace() will shift the
    736   // entire remaining string each time.  We need to be more clever to keep
    737   // things O(n).
    738   //
    739   // If we're shortening the string, we can alternate replacements with shifting
    740   // forward the intervening characters using memmove().
    741   size_t str_length = str->length();
    742   if (find_length > replace_length) {
    743     size_t write_offset = offset;
    744     do {
    745       if (replace_length) {
    746         str->replace(write_offset, replace_length,
    747                      replace_with.data(), replace_with.size());
    748         write_offset += replace_length;
    749       }
    750       size_t read_offset = offset + find_length;
    751       offset = std::min(
    752           str->find(find_this.data(), read_offset, find_this.size()),
    753           str_length);
    754       size_t length = offset - read_offset;
    755       if (length) {
    756         memmove(&(*str)[write_offset], &(*str)[read_offset],
    757                 length * sizeof(typename StringType::value_type));
    758         write_offset += length;
    759       }
    760     } while (offset < str_length);
    761     str->resize(write_offset);
    762     return;
    763   }
    764 
    765   // We're lengthening the string.  We can use alternating replacements and
    766   // memmove() calls like above, but we need to precalculate the final string
    767   // length and then expand from back-to-front to avoid overwriting the string
    768   // as we're reading it, needing to shift, or having to copy to a second string
    769   // temporarily.
    770   size_t first_match = offset;
    771 
    772   // First, calculate the final length and resize the string.
    773   size_t final_length = str_length;
    774   size_t expansion = replace_length - find_length;
    775   size_t current_match;
    776   do {
    777     final_length += expansion;
    778     // Minor optimization: save this offset into |current_match|, so that on
    779     // exit from the loop, |current_match| will point at the last instance of
    780     // the find string, and we won't need to find() it again immediately.
    781     current_match = offset;
    782     offset = str->find(find_this.data(), offset + find_length,
    783                        find_this.size());
    784   } while (offset != StringType::npos);
    785   str->resize(final_length);
    786 
    787   // Now do the replacement loop, working backwards through the string.
    788   for (size_t prev_match = str_length, write_offset = final_length; ;
    789        current_match = str->rfind(find_this.data(), current_match - 1,
    790                                   find_this.size())) {
    791     size_t read_offset = current_match + find_length;
    792     size_t length = prev_match - read_offset;
    793     if (length) {
    794       write_offset -= length;
    795       memmove(&(*str)[write_offset], &(*str)[read_offset],
    796               length * sizeof(typename StringType::value_type));
    797     }
    798     write_offset -= replace_length;
    799     str->replace(write_offset, replace_length,
    800                  replace_with.data(), replace_with.size());
    801     if (current_match == first_match)
    802       return;
    803     prev_match = current_match;
    804   }
    805 }
    806 
    807 void ReplaceFirstSubstringAfterOffset(string16* str,
    808                                       size_t start_offset,
    809                                       StringPiece16 find_this,
    810                                       StringPiece16 replace_with) {
    811   DoReplaceSubstringsAfterOffset<string16>(
    812       str, start_offset, find_this, replace_with, false);  // Replace first.
    813 }
    814 
    815 void ReplaceFirstSubstringAfterOffset(std::string* str,
    816                                       size_t start_offset,
    817                                       StringPiece find_this,
    818                                       StringPiece replace_with) {
    819   DoReplaceSubstringsAfterOffset<std::string>(
    820       str, start_offset, find_this, replace_with, false);  // Replace first.
    821 }
    822 
    823 void ReplaceSubstringsAfterOffset(string16* str,
    824                                   size_t start_offset,
    825                                   StringPiece16 find_this,
    826                                   StringPiece16 replace_with) {
    827   DoReplaceSubstringsAfterOffset<string16>(
    828       str, start_offset, find_this, replace_with, true);  // Replace all.
    829 }
    830 
    831 void ReplaceSubstringsAfterOffset(std::string* str,
    832                                   size_t start_offset,
    833                                   StringPiece find_this,
    834                                   StringPiece replace_with) {
    835   DoReplaceSubstringsAfterOffset<std::string>(
    836       str, start_offset, find_this, replace_with, true);  // Replace all.
    837 }
    838 
    839 template <class string_type>
    840 inline typename string_type::value_type* WriteIntoT(string_type* str,
    841                                                     size_t length_with_null) {
    842   DCHECK_GT(length_with_null, 1u);
    843   str->reserve(length_with_null);
    844   str->resize(length_with_null - 1);
    845   return &((*str)[0]);
    846 }
    847 
    848 char* WriteInto(std::string* str, size_t length_with_null) {
    849   return WriteIntoT(str, length_with_null);
    850 }
    851 
    852 char16* WriteInto(string16* str, size_t length_with_null) {
    853   return WriteIntoT(str, length_with_null);
    854 }
    855 
    856 template<typename STR>
    857 static STR JoinStringT(const std::vector<STR>& parts,
    858                        BasicStringPiece<STR> sep) {
    859   if (parts.empty())
    860     return STR();
    861 
    862   STR result(parts[0]);
    863   auto iter = parts.begin();
    864   ++iter;
    865 
    866   for (; iter != parts.end(); ++iter) {
    867     sep.AppendToString(&result);
    868     result += *iter;
    869   }
    870 
    871   return result;
    872 }
    873 
    874 std::string JoinString(const std::vector<std::string>& parts,
    875                        StringPiece separator) {
    876   return JoinStringT(parts, separator);
    877 }
    878 
    879 string16 JoinString(const std::vector<string16>& parts,
    880                     StringPiece16 separator) {
    881   return JoinStringT(parts, separator);
    882 }
    883 
    884 template<class FormatStringType, class OutStringType>
    885 OutStringType DoReplaceStringPlaceholders(
    886     const FormatStringType& format_string,
    887     const std::vector<OutStringType>& subst,
    888     std::vector<size_t>* offsets) {
    889   size_t substitutions = subst.size();
    890   DCHECK_LT(substitutions, 10U);
    891 
    892   size_t sub_length = 0;
    893   for (const auto& cur : subst)
    894     sub_length += cur.length();
    895 
    896   OutStringType formatted;
    897   formatted.reserve(format_string.length() + sub_length);
    898 
    899   std::vector<ReplacementOffset> r_offsets;
    900   for (auto i = format_string.begin(); i != format_string.end(); ++i) {
    901     if ('$' == *i) {
    902       if (i + 1 != format_string.end()) {
    903         ++i;
    904         if ('$' == *i) {
    905           while (i != format_string.end() && '$' == *i) {
    906             formatted.push_back('$');
    907             ++i;
    908           }
    909           --i;
    910         } else {
    911           if (*i < '1' || *i > '9') {
    912             DLOG(ERROR) << "Invalid placeholder: $" << *i;
    913             continue;
    914           }
    915           uintptr_t index = *i - '1';
    916           if (offsets) {
    917             ReplacementOffset r_offset(index,
    918                 static_cast<int>(formatted.size()));
    919             r_offsets.insert(std::lower_bound(r_offsets.begin(),
    920                                               r_offsets.end(),
    921                                               r_offset,
    922                                               &CompareParameter),
    923                              r_offset);
    924           }
    925           if (index < substitutions)
    926             formatted.append(subst.at(index));
    927         }
    928       }
    929     } else {
    930       formatted.push_back(*i);
    931     }
    932   }
    933   if (offsets) {
    934     for (const auto& cur : r_offsets)
    935       offsets->push_back(cur.offset);
    936   }
    937   return formatted;
    938 }
    939 
    940 string16 ReplaceStringPlaceholders(const string16& format_string,
    941                                    const std::vector<string16>& subst,
    942                                    std::vector<size_t>* offsets) {
    943   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    944 }
    945 
    946 std::string ReplaceStringPlaceholders(const StringPiece& format_string,
    947                                       const std::vector<std::string>& subst,
    948                                       std::vector<size_t>* offsets) {
    949   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    950 }
    951 
    952 string16 ReplaceStringPlaceholders(const string16& format_string,
    953                                    const string16& a,
    954                                    size_t* offset) {
    955   std::vector<size_t> offsets;
    956   std::vector<string16> subst;
    957   subst.push_back(a);
    958   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
    959 
    960   DCHECK_EQ(1U, offsets.size());
    961   if (offset)
    962     *offset = offsets[0];
    963   return result;
    964 }
    965 
    966 // The following code is compatible with the OpenBSD lcpy interface.  See:
    967 //   http://www.gratisoft.us/todd/papers/strlcpy.html
    968 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
    969 
    970 namespace {
    971 
    972 template <typename CHAR>
    973 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
    974   for (size_t i = 0; i < dst_size; ++i) {
    975     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
    976       return i;
    977   }
    978 
    979   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
    980   if (dst_size != 0)
    981     dst[dst_size - 1] = 0;
    982 
    983   // Count the rest of the |src|, and return it's length in characters.
    984   while (src[dst_size]) ++dst_size;
    985   return dst_size;
    986 }
    987 
    988 }  // namespace
    989 
    990 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
    991   return lcpyT<char>(dst, src, dst_size);
    992 }
    993 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
    994   return lcpyT<wchar_t>(dst, src, dst_size);
    995 }
    996 
    997 }  // namespace base
    998