Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <ctype.h>
      8 #include <errno.h>
      9 #include <math.h>
     10 #include <stdarg.h>
     11 #include <stdint.h>
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 #include <time.h>
     16 #include <wchar.h>
     17 #include <wctype.h>
     18 
     19 #include <algorithm>
     20 #include <limits>
     21 #include <vector>
     22 
     23 #include "base/logging.h"
     24 #include "base/macros.h"
     25 #include "base/memory/singleton.h"
     26 #include "base/strings/string_split.h"
     27 #include "base/strings/utf_string_conversion_utils.h"
     28 #include "base/strings/utf_string_conversions.h"
     29 #include "base/third_party/icu/icu_utf.h"
     30 #include "build/build_config.h"
     31 
     32 namespace base {
     33 
     34 namespace {
     35 
     36 // Force the singleton used by EmptyString[16] to be a unique type. This
     37 // prevents other code that might accidentally use Singleton<string> from
     38 // getting our internal one.
     39 struct EmptyStrings {
     40   EmptyStrings() {}
     41   const std::string s;
     42   const string16 s16;
     43 
     44   static EmptyStrings* GetInstance() {
     45     return Singleton<EmptyStrings>::get();
     46   }
     47 };
     48 
     49 // Used by ReplaceStringPlaceholders to track the position in the string of
     50 // replaced parameters.
     51 struct ReplacementOffset {
     52   ReplacementOffset(uintptr_t parameter, size_t offset)
     53       : parameter(parameter),
     54         offset(offset) {}
     55 
     56   // Index of the parameter.
     57   uintptr_t parameter;
     58 
     59   // Starting position in the string.
     60   size_t offset;
     61 };
     62 
     63 static bool CompareParameter(const ReplacementOffset& elem1,
     64                              const ReplacementOffset& elem2) {
     65   return elem1.parameter < elem2.parameter;
     66 }
     67 
     68 // Assuming that a pointer is the size of a "machine word", then
     69 // uintptr_t is an integer type that is also a machine word.
     70 typedef uintptr_t MachineWord;
     71 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
     72 
     73 inline bool IsAlignedToMachineWord(const void* pointer) {
     74   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
     75 }
     76 
     77 template<typename T> inline T* AlignToMachineWord(T* pointer) {
     78   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
     79                               ~kMachineWordAlignmentMask);
     80 }
     81 
     82 template<size_t size, typename CharacterType> struct NonASCIIMask;
     83 template<> struct NonASCIIMask<4, char16> {
     84     static inline uint32_t value() { return 0xFF80FF80U; }
     85 };
     86 template<> struct NonASCIIMask<4, char> {
     87     static inline uint32_t value() { return 0x80808080U; }
     88 };
     89 template<> struct NonASCIIMask<8, char16> {
     90     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
     91 };
     92 template<> struct NonASCIIMask<8, char> {
     93     static inline uint64_t value() { return 0x8080808080808080ULL; }
     94 };
     95 #if defined(WCHAR_T_IS_UTF32)
     96 template<> struct NonASCIIMask<4, wchar_t> {
     97     static inline uint32_t value() { return 0xFFFFFF80U; }
     98 };
     99 template<> struct NonASCIIMask<8, wchar_t> {
    100     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
    101 };
    102 #endif  // WCHAR_T_IS_UTF32
    103 
    104 }  // namespace
    105 
    106 bool IsWprintfFormatPortable(const wchar_t* format) {
    107   for (const wchar_t* position = format; *position != '\0'; ++position) {
    108     if (*position == '%') {
    109       bool in_specification = true;
    110       bool modifier_l = false;
    111       while (in_specification) {
    112         // Eat up characters until reaching a known specifier.
    113         if (*++position == '\0') {
    114           // The format string ended in the middle of a specification.  Call
    115           // it portable because no unportable specifications were found.  The
    116           // string is equally broken on all platforms.
    117           return true;
    118         }
    119 
    120         if (*position == 'l') {
    121           // 'l' is the only thing that can save the 's' and 'c' specifiers.
    122           modifier_l = true;
    123         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
    124                    *position == 'S' || *position == 'C' || *position == 'F' ||
    125                    *position == 'D' || *position == 'O' || *position == 'U') {
    126           // Not portable.
    127           return false;
    128         }
    129 
    130         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
    131           // Portable, keep scanning the rest of the format string.
    132           in_specification = false;
    133         }
    134       }
    135     }
    136   }
    137 
    138   return true;
    139 }
    140 
    141 namespace {
    142 
    143 template<typename StringType>
    144 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
    145   StringType ret;
    146   ret.reserve(str.size());
    147   for (size_t i = 0; i < str.size(); i++)
    148     ret.push_back(ToLowerASCII(str[i]));
    149   return ret;
    150 }
    151 
    152 template<typename StringType>
    153 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
    154   StringType ret;
    155   ret.reserve(str.size());
    156   for (size_t i = 0; i < str.size(); i++)
    157     ret.push_back(ToUpperASCII(str[i]));
    158   return ret;
    159 }
    160 
    161 }  // namespace
    162 
    163 std::string ToLowerASCII(StringPiece str) {
    164   return ToLowerASCIIImpl<std::string>(str);
    165 }
    166 
    167 string16 ToLowerASCII(StringPiece16 str) {
    168   return ToLowerASCIIImpl<string16>(str);
    169 }
    170 
    171 std::string ToUpperASCII(StringPiece str) {
    172   return ToUpperASCIIImpl<std::string>(str);
    173 }
    174 
    175 string16 ToUpperASCII(StringPiece16 str) {
    176   return ToUpperASCIIImpl<string16>(str);
    177 }
    178 
    179 template<class StringType>
    180 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
    181                                  BasicStringPiece<StringType> b) {
    182   // Find the first characters that aren't equal and compare them.  If the end
    183   // of one of the strings is found before a nonequal character, the lengths
    184   // of the strings are compared.
    185   size_t i = 0;
    186   while (i < a.length() && i < b.length()) {
    187     typename StringType::value_type lower_a = ToLowerASCII(a[i]);
    188     typename StringType::value_type lower_b = ToLowerASCII(b[i]);
    189     if (lower_a < lower_b)
    190       return -1;
    191     if (lower_a > lower_b)
    192       return 1;
    193     i++;
    194   }
    195 
    196   // End of one string hit before finding a different character. Expect the
    197   // common case to be "strings equal" at this point so check that first.
    198   if (a.length() == b.length())
    199     return 0;
    200 
    201   if (a.length() < b.length())
    202     return -1;
    203   return 1;
    204 }
    205 
    206 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    207   return CompareCaseInsensitiveASCIIT<std::string>(a, b);
    208 }
    209 
    210 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    211   return CompareCaseInsensitiveASCIIT<string16>(a, b);
    212 }
    213 
    214 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    215   if (a.length() != b.length())
    216     return false;
    217   return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
    218 }
    219 
    220 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    221   if (a.length() != b.length())
    222     return false;
    223   return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
    224 }
    225 
    226 const std::string& EmptyString() {
    227   return EmptyStrings::GetInstance()->s;
    228 }
    229 
    230 const string16& EmptyString16() {
    231   return EmptyStrings::GetInstance()->s16;
    232 }
    233 
    234 template<typename STR>
    235 bool ReplaceCharsT(const STR& input,
    236                    const STR& replace_chars,
    237                    const STR& replace_with,
    238                    STR* output) {
    239   bool removed = false;
    240   size_t replace_length = replace_with.length();
    241 
    242   *output = input;
    243 
    244   size_t found = output->find_first_of(replace_chars);
    245   while (found != STR::npos) {
    246     removed = true;
    247     output->replace(found, 1, replace_with);
    248     found = output->find_first_of(replace_chars, found + replace_length);
    249   }
    250 
    251   return removed;
    252 }
    253 
    254 bool ReplaceChars(const string16& input,
    255                   const StringPiece16& replace_chars,
    256                   const string16& replace_with,
    257                   string16* output) {
    258   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    259 }
    260 
    261 bool ReplaceChars(const std::string& input,
    262                   const StringPiece& replace_chars,
    263                   const std::string& replace_with,
    264                   std::string* output) {
    265   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    266 }
    267 
    268 bool RemoveChars(const string16& input,
    269                  const StringPiece16& remove_chars,
    270                  string16* output) {
    271   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
    272 }
    273 
    274 bool RemoveChars(const std::string& input,
    275                  const StringPiece& remove_chars,
    276                  std::string* output) {
    277   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
    278 }
    279 
    280 template<typename Str>
    281 TrimPositions TrimStringT(const Str& input,
    282                           BasicStringPiece<Str> trim_chars,
    283                           TrimPositions positions,
    284                           Str* output) {
    285   // Find the edges of leading/trailing whitespace as desired. Need to use
    286   // a StringPiece version of input to be able to call find* on it with the
    287   // StringPiece version of trim_chars (normally the trim_chars will be a
    288   // constant so avoid making a copy).
    289   BasicStringPiece<Str> input_piece(input);
    290   const size_t last_char = input.length() - 1;
    291   const size_t first_good_char = (positions & TRIM_LEADING) ?
    292       input_piece.find_first_not_of(trim_chars) : 0;
    293   const size_t last_good_char = (positions & TRIM_TRAILING) ?
    294       input_piece.find_last_not_of(trim_chars) : last_char;
    295 
    296   // When the string was all trimmed, report that we stripped off characters
    297   // from whichever position the caller was interested in. For empty input, we
    298   // stripped no characters, but we still need to clear |output|.
    299   if (input.empty() ||
    300       (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
    301     bool input_was_empty = input.empty();  // in case output == &input
    302     output->clear();
    303     return input_was_empty ? TRIM_NONE : positions;
    304   }
    305 
    306   // Trim.
    307   *output =
    308       input.substr(first_good_char, last_good_char - first_good_char + 1);
    309 
    310   // Return where we trimmed from.
    311   return static_cast<TrimPositions>(
    312       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
    313       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
    314 }
    315 
    316 bool TrimString(const string16& input,
    317                 StringPiece16 trim_chars,
    318                 string16* output) {
    319   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    320 }
    321 
    322 bool TrimString(const std::string& input,
    323                 StringPiece trim_chars,
    324                 std::string* output) {
    325   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    326 }
    327 
    328 template<typename Str>
    329 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
    330                                        BasicStringPiece<Str> trim_chars,
    331                                        TrimPositions positions) {
    332   size_t begin = (positions & TRIM_LEADING) ?
    333       input.find_first_not_of(trim_chars) : 0;
    334   size_t end = (positions & TRIM_TRAILING) ?
    335       input.find_last_not_of(trim_chars) + 1 : input.size();
    336   return input.substr(begin, end - begin);
    337 }
    338 
    339 StringPiece16 TrimString(StringPiece16 input,
    340                          const StringPiece16& trim_chars,
    341                          TrimPositions positions) {
    342   return TrimStringPieceT(input, trim_chars, positions);
    343 }
    344 
    345 StringPiece TrimString(StringPiece input,
    346                        const StringPiece& trim_chars,
    347                        TrimPositions positions) {
    348   return TrimStringPieceT(input, trim_chars, positions);
    349 }
    350 
    351 void TruncateUTF8ToByteSize(const std::string& input,
    352                             const size_t byte_size,
    353                             std::string* output) {
    354   DCHECK(output);
    355   if (byte_size > input.length()) {
    356     *output = input;
    357     return;
    358   }
    359   DCHECK_LE(byte_size,
    360             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
    361   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
    362   int32_t truncation_length = static_cast<int32_t>(byte_size);
    363   int32_t char_index = truncation_length - 1;
    364   const char* data = input.data();
    365 
    366   // Using CBU8, we will move backwards from the truncation point
    367   // to the beginning of the string looking for a valid UTF8
    368   // character.  Once a full UTF8 character is found, we will
    369   // truncate the string to the end of that character.
    370   while (char_index >= 0) {
    371     int32_t prev = char_index;
    372     base_icu::UChar32 code_point = 0;
    373     CBU8_NEXT(data, char_index, truncation_length, code_point);
    374     if (!IsValidCharacter(code_point) ||
    375         !IsValidCodepoint(code_point)) {
    376       char_index = prev - 1;
    377     } else {
    378       break;
    379     }
    380   }
    381 
    382   if (char_index >= 0 )
    383     *output = input.substr(0, char_index);
    384   else
    385     output->clear();
    386 }
    387 
    388 TrimPositions TrimWhitespace(const string16& input,
    389                              TrimPositions positions,
    390                              string16* output) {
    391   return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
    392 }
    393 
    394 StringPiece16 TrimWhitespace(StringPiece16 input,
    395                              TrimPositions positions) {
    396   return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
    397 }
    398 
    399 TrimPositions TrimWhitespaceASCII(const std::string& input,
    400                                   TrimPositions positions,
    401                                   std::string* output) {
    402   return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
    403 }
    404 
    405 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
    406   return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
    407 }
    408 
    409 template<typename STR>
    410 STR CollapseWhitespaceT(const STR& text,
    411                         bool trim_sequences_with_line_breaks) {
    412   STR result;
    413   result.resize(text.size());
    414 
    415   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
    416   // will trim any leading whitespace.
    417   bool in_whitespace = true;
    418   bool already_trimmed = true;
    419 
    420   int chars_written = 0;
    421   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
    422     if (IsUnicodeWhitespace(*i)) {
    423       if (!in_whitespace) {
    424         // Reduce all whitespace sequences to a single space.
    425         in_whitespace = true;
    426         result[chars_written++] = L' ';
    427       }
    428       if (trim_sequences_with_line_breaks && !already_trimmed &&
    429           ((*i == '\n') || (*i == '\r'))) {
    430         // Whitespace sequences containing CR or LF are eliminated entirely.
    431         already_trimmed = true;
    432         --chars_written;
    433       }
    434     } else {
    435       // Non-whitespace chracters are copied straight across.
    436       in_whitespace = false;
    437       already_trimmed = false;
    438       result[chars_written++] = *i;
    439     }
    440   }
    441 
    442   if (in_whitespace && !already_trimmed) {
    443     // Any trailing whitespace is eliminated.
    444     --chars_written;
    445   }
    446 
    447   result.resize(chars_written);
    448   return result;
    449 }
    450 
    451 string16 CollapseWhitespace(const string16& text,
    452                             bool trim_sequences_with_line_breaks) {
    453   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    454 }
    455 
    456 std::string CollapseWhitespaceASCII(const std::string& text,
    457                                     bool trim_sequences_with_line_breaks) {
    458   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    459 }
    460 
    461 bool ContainsOnlyChars(const StringPiece& input,
    462                        const StringPiece& characters) {
    463   return input.find_first_not_of(characters) == StringPiece::npos;
    464 }
    465 
    466 bool ContainsOnlyChars(const StringPiece16& input,
    467                        const StringPiece16& characters) {
    468   return input.find_first_not_of(characters) == StringPiece16::npos;
    469 }
    470 
    471 template <class Char>
    472 inline bool DoIsStringASCII(const Char* characters, size_t length) {
    473   MachineWord all_char_bits = 0;
    474   const Char* end = characters + length;
    475 
    476   // Prologue: align the input.
    477   while (!IsAlignedToMachineWord(characters) && characters != end) {
    478     all_char_bits |= *characters;
    479     ++characters;
    480   }
    481 
    482   // Compare the values of CPU word size.
    483   const Char* word_end = AlignToMachineWord(end);
    484   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
    485   while (characters < word_end) {
    486     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
    487     characters += loop_increment;
    488   }
    489 
    490   // Process the remaining bytes.
    491   while (characters != end) {
    492     all_char_bits |= *characters;
    493     ++characters;
    494   }
    495 
    496   MachineWord non_ascii_bit_mask =
    497       NonASCIIMask<sizeof(MachineWord), Char>::value();
    498   return !(all_char_bits & non_ascii_bit_mask);
    499 }
    500 
    501 bool IsStringASCII(const StringPiece& str) {
    502   return DoIsStringASCII(str.data(), str.length());
    503 }
    504 
    505 bool IsStringASCII(const StringPiece16& str) {
    506   return DoIsStringASCII(str.data(), str.length());
    507 }
    508 
    509 bool IsStringASCII(const string16& str) {
    510   return DoIsStringASCII(str.data(), str.length());
    511 }
    512 
    513 #if defined(WCHAR_T_IS_UTF32)
    514 bool IsStringASCII(const std::wstring& str) {
    515   return DoIsStringASCII(str.data(), str.length());
    516 }
    517 #endif
    518 
    519 bool IsStringUTF8(const StringPiece& str) {
    520   const char *src = str.data();
    521   int32_t src_len = static_cast<int32_t>(str.length());
    522   int32_t char_index = 0;
    523 
    524   while (char_index < src_len) {
    525     int32_t code_point;
    526     CBU8_NEXT(src, char_index, src_len, code_point);
    527     if (!IsValidCharacter(code_point))
    528       return false;
    529   }
    530   return true;
    531 }
    532 
    533 // Implementation note: Normally this function will be called with a hardcoded
    534 // constant for the lowercase_ascii parameter. Constructing a StringPiece from
    535 // a C constant requires running strlen, so the result will be two passes
    536 // through the buffers, one to file the length of lowercase_ascii, and one to
    537 // compare each letter.
    538 //
    539 // This function could have taken a const char* to avoid this and only do one
    540 // pass through the string. But the strlen is faster than the case-insensitive
    541 // compares and lets us early-exit in the case that the strings are different
    542 // lengths (will often be the case for non-matches). So whether one approach or
    543 // the other will be faster depends on the case.
    544 //
    545 // The hardcoded strings are typically very short so it doesn't matter, and the
    546 // string piece gives additional flexibility for the caller (doesn't have to be
    547 // null terminated) so we choose the StringPiece route.
    548 template<typename Str>
    549 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
    550                                           StringPiece lowercase_ascii) {
    551   if (str.size() != lowercase_ascii.size())
    552     return false;
    553   for (size_t i = 0; i < str.size(); i++) {
    554     if (ToLowerASCII(str[i]) != lowercase_ascii[i])
    555       return false;
    556   }
    557   return true;
    558 }
    559 
    560 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
    561   return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
    562 }
    563 
    564 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
    565   return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
    566 }
    567 
    568 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
    569   if (str.length() != ascii.length())
    570     return false;
    571   return std::equal(ascii.begin(), ascii.end(), str.begin());
    572 }
    573 
    574 template<typename Str>
    575 bool StartsWithT(BasicStringPiece<Str> str,
    576                  BasicStringPiece<Str> search_for,
    577                  CompareCase case_sensitivity) {
    578   if (search_for.size() > str.size())
    579     return false;
    580 
    581   BasicStringPiece<Str> source = str.substr(0, search_for.size());
    582 
    583   switch (case_sensitivity) {
    584     case CompareCase::SENSITIVE:
    585       return source == search_for;
    586 
    587     case CompareCase::INSENSITIVE_ASCII:
    588       return std::equal(
    589           search_for.begin(), search_for.end(),
    590           source.begin(),
    591           CaseInsensitiveCompareASCII<typename Str::value_type>());
    592 
    593     default:
    594       NOTREACHED();
    595       return false;
    596   }
    597 }
    598 
    599 bool StartsWith(StringPiece str,
    600                 StringPiece search_for,
    601                 CompareCase case_sensitivity) {
    602   return StartsWithT<std::string>(str, search_for, case_sensitivity);
    603 }
    604 
    605 bool StartsWith(StringPiece16 str,
    606                 StringPiece16 search_for,
    607                 CompareCase case_sensitivity) {
    608   return StartsWithT<string16>(str, search_for, case_sensitivity);
    609 }
    610 
    611 template <typename Str>
    612 bool EndsWithT(BasicStringPiece<Str> str,
    613                BasicStringPiece<Str> search_for,
    614                CompareCase case_sensitivity) {
    615   if (search_for.size() > str.size())
    616     return false;
    617 
    618   BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
    619                                             search_for.size());
    620 
    621   switch (case_sensitivity) {
    622     case CompareCase::SENSITIVE:
    623       return source == search_for;
    624 
    625     case CompareCase::INSENSITIVE_ASCII:
    626       return std::equal(
    627           source.begin(), source.end(),
    628           search_for.begin(),
    629           CaseInsensitiveCompareASCII<typename Str::value_type>());
    630 
    631     default:
    632       NOTREACHED();
    633       return false;
    634   }
    635 }
    636 
    637 bool EndsWith(StringPiece str,
    638               StringPiece search_for,
    639               CompareCase case_sensitivity) {
    640   return EndsWithT<std::string>(str, search_for, case_sensitivity);
    641 }
    642 
    643 bool EndsWith(StringPiece16 str,
    644               StringPiece16 search_for,
    645               CompareCase case_sensitivity) {
    646   return EndsWithT<string16>(str, search_for, case_sensitivity);
    647 }
    648 
    649 char HexDigitToInt(wchar_t c) {
    650   DCHECK(IsHexDigit(c));
    651   if (c >= '0' && c <= '9')
    652     return static_cast<char>(c - '0');
    653   if (c >= 'A' && c <= 'F')
    654     return static_cast<char>(c - 'A' + 10);
    655   if (c >= 'a' && c <= 'f')
    656     return static_cast<char>(c - 'a' + 10);
    657   return 0;
    658 }
    659 
    660 bool IsUnicodeWhitespace(wchar_t c) {
    661   // kWhitespaceWide is a NULL-terminated string
    662   for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) {
    663     if (*cur == c)
    664       return true;
    665   }
    666   return false;
    667 }
    668 
    669 static const char* const kByteStringsUnlocalized[] = {
    670   " B",
    671   " kB",
    672   " MB",
    673   " GB",
    674   " TB",
    675   " PB"
    676 };
    677 
    678 string16 FormatBytesUnlocalized(int64_t bytes) {
    679   double unit_amount = static_cast<double>(bytes);
    680   size_t dimension = 0;
    681   const int kKilo = 1024;
    682   while (unit_amount >= kKilo &&
    683          dimension < arraysize(kByteStringsUnlocalized) - 1) {
    684     unit_amount /= kKilo;
    685     dimension++;
    686   }
    687 
    688   char buf[64];
    689   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
    690     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
    691                    kByteStringsUnlocalized[dimension]);
    692   } else {
    693     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
    694                    kByteStringsUnlocalized[dimension]);
    695   }
    696 
    697   return ASCIIToUTF16(buf);
    698 }
    699 
    700 // Runs in O(n) time in the length of |str|.
    701 template<class StringType>
    702 void DoReplaceSubstringsAfterOffset(StringType* str,
    703                                     size_t offset,
    704                                     BasicStringPiece<StringType> find_this,
    705                                     BasicStringPiece<StringType> replace_with,
    706                                     bool replace_all) {
    707   DCHECK(!find_this.empty());
    708 
    709   // If the find string doesn't appear, there's nothing to do.
    710   offset = str->find(find_this.data(), offset, find_this.size());
    711   if (offset == StringType::npos)
    712     return;
    713 
    714   // If we're only replacing one instance, there's no need to do anything
    715   // complicated.
    716   size_t find_length = find_this.length();
    717   if (!replace_all) {
    718     str->replace(offset, find_length, replace_with.data(), replace_with.size());
    719     return;
    720   }
    721 
    722   // If the find and replace strings are the same length, we can simply use
    723   // replace() on each instance, and finish the entire operation in O(n) time.
    724   size_t replace_length = replace_with.length();
    725   if (find_length == replace_length) {
    726     do {
    727       str->replace(offset, find_length,
    728                    replace_with.data(), replace_with.size());
    729       offset = str->find(find_this.data(), offset + replace_length,
    730                          find_this.size());
    731     } while (offset != StringType::npos);
    732     return;
    733   }
    734 
    735   // Since the find and replace strings aren't the same length, a loop like the
    736   // one above would be O(n^2) in the worst case, as replace() will shift the
    737   // entire remaining string each time.  We need to be more clever to keep
    738   // things O(n).
    739   //
    740   // If we're shortening the string, we can alternate replacements with shifting
    741   // forward the intervening characters using memmove().
    742   size_t str_length = str->length();
    743   if (find_length > replace_length) {
    744     size_t write_offset = offset;
    745     do {
    746       if (replace_length) {
    747         str->replace(write_offset, replace_length,
    748                      replace_with.data(), replace_with.size());
    749         write_offset += replace_length;
    750       }
    751       size_t read_offset = offset + find_length;
    752       offset = std::min(
    753           str->find(find_this.data(), read_offset, find_this.size()),
    754           str_length);
    755       size_t length = offset - read_offset;
    756       if (length) {
    757         memmove(&(*str)[write_offset], &(*str)[read_offset],
    758                 length * sizeof(typename StringType::value_type));
    759         write_offset += length;
    760       }
    761     } while (offset < str_length);
    762     str->resize(write_offset);
    763     return;
    764   }
    765 
    766   // We're lengthening the string.  We can use alternating replacements and
    767   // memmove() calls like above, but we need to precalculate the final string
    768   // length and then expand from back-to-front to avoid overwriting the string
    769   // as we're reading it, needing to shift, or having to copy to a second string
    770   // temporarily.
    771   size_t first_match = offset;
    772 
    773   // First, calculate the final length and resize the string.
    774   size_t final_length = str_length;
    775   size_t expansion = replace_length - find_length;
    776   size_t current_match;
    777   do {
    778     final_length += expansion;
    779     // Minor optimization: save this offset into |current_match|, so that on
    780     // exit from the loop, |current_match| will point at the last instance of
    781     // the find string, and we won't need to find() it again immediately.
    782     current_match = offset;
    783     offset = str->find(find_this.data(), offset + find_length,
    784                        find_this.size());
    785   } while (offset != StringType::npos);
    786   str->resize(final_length);
    787 
    788   // Now do the replacement loop, working backwards through the string.
    789   for (size_t prev_match = str_length, write_offset = final_length; ;
    790        current_match = str->rfind(find_this.data(), current_match - 1,
    791                                   find_this.size())) {
    792     size_t read_offset = current_match + find_length;
    793     size_t length = prev_match - read_offset;
    794     if (length) {
    795       write_offset -= length;
    796       memmove(&(*str)[write_offset], &(*str)[read_offset],
    797               length * sizeof(typename StringType::value_type));
    798     }
    799     write_offset -= replace_length;
    800     str->replace(write_offset, replace_length,
    801                  replace_with.data(), replace_with.size());
    802     if (current_match == first_match)
    803       return;
    804     prev_match = current_match;
    805   }
    806 }
    807 
    808 void ReplaceFirstSubstringAfterOffset(string16* str,
    809                                       size_t start_offset,
    810                                       StringPiece16 find_this,
    811                                       StringPiece16 replace_with) {
    812   DoReplaceSubstringsAfterOffset<string16>(
    813       str, start_offset, find_this, replace_with, false);  // Replace first.
    814 }
    815 
    816 void ReplaceFirstSubstringAfterOffset(std::string* str,
    817                                       size_t start_offset,
    818                                       StringPiece find_this,
    819                                       StringPiece replace_with) {
    820   DoReplaceSubstringsAfterOffset<std::string>(
    821       str, start_offset, find_this, replace_with, false);  // Replace first.
    822 }
    823 
    824 void ReplaceSubstringsAfterOffset(string16* str,
    825                                   size_t start_offset,
    826                                   StringPiece16 find_this,
    827                                   StringPiece16 replace_with) {
    828   DoReplaceSubstringsAfterOffset<string16>(
    829       str, start_offset, find_this, replace_with, true);  // Replace all.
    830 }
    831 
    832 void ReplaceSubstringsAfterOffset(std::string* str,
    833                                   size_t start_offset,
    834                                   StringPiece find_this,
    835                                   StringPiece replace_with) {
    836   DoReplaceSubstringsAfterOffset<std::string>(
    837       str, start_offset, find_this, replace_with, true);  // Replace all.
    838 }
    839 
    840 template <class string_type>
    841 inline typename string_type::value_type* WriteIntoT(string_type* str,
    842                                                     size_t length_with_null) {
    843   DCHECK_GT(length_with_null, 1u);
    844   str->reserve(length_with_null);
    845   str->resize(length_with_null - 1);
    846   return &((*str)[0]);
    847 }
    848 
    849 char* WriteInto(std::string* str, size_t length_with_null) {
    850   return WriteIntoT(str, length_with_null);
    851 }
    852 
    853 char16* WriteInto(string16* str, size_t length_with_null) {
    854   return WriteIntoT(str, length_with_null);
    855 }
    856 
    857 template<typename STR>
    858 static STR JoinStringT(const std::vector<STR>& parts,
    859                        BasicStringPiece<STR> sep) {
    860   if (parts.empty())
    861     return STR();
    862 
    863   STR result(parts[0]);
    864   auto iter = parts.begin();
    865   ++iter;
    866 
    867   for (; iter != parts.end(); ++iter) {
    868     sep.AppendToString(&result);
    869     result += *iter;
    870   }
    871 
    872   return result;
    873 }
    874 
    875 std::string JoinString(const std::vector<std::string>& parts,
    876                        StringPiece separator) {
    877   return JoinStringT(parts, separator);
    878 }
    879 
    880 string16 JoinString(const std::vector<string16>& parts,
    881                     StringPiece16 separator) {
    882   return JoinStringT(parts, separator);
    883 }
    884 
    885 template<class FormatStringType, class OutStringType>
    886 OutStringType DoReplaceStringPlaceholders(
    887     const FormatStringType& format_string,
    888     const std::vector<OutStringType>& subst,
    889     std::vector<size_t>* offsets) {
    890   size_t substitutions = subst.size();
    891 
    892   size_t sub_length = 0;
    893   for (const auto& cur : subst)
    894     sub_length += cur.length();
    895 
    896   OutStringType formatted;
    897   formatted.reserve(format_string.length() + sub_length);
    898 
    899   std::vector<ReplacementOffset> r_offsets;
    900   for (auto i = format_string.begin(); i != format_string.end(); ++i) {
    901     if ('$' == *i) {
    902       if (i + 1 != format_string.end()) {
    903         ++i;
    904         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
    905         if ('$' == *i) {
    906           while (i != format_string.end() && '$' == *i) {
    907             formatted.push_back('$');
    908             ++i;
    909           }
    910           --i;
    911         } else {
    912           uintptr_t index = 0;
    913           while (i != format_string.end() && '0' <= *i && *i <= '9') {
    914             index *= 10;
    915             index += *i - '0';
    916             ++i;
    917           }
    918           --i;
    919           index -= 1;
    920           if (offsets) {
    921             ReplacementOffset r_offset(index,
    922                 static_cast<int>(formatted.size()));
    923             r_offsets.insert(std::lower_bound(r_offsets.begin(),
    924                                               r_offsets.end(),
    925                                               r_offset,
    926                                               &CompareParameter),
    927                              r_offset);
    928           }
    929           if (index < substitutions)
    930             formatted.append(subst.at(index));
    931         }
    932       }
    933     } else {
    934       formatted.push_back(*i);
    935     }
    936   }
    937   if (offsets) {
    938     for (const auto& cur : r_offsets)
    939       offsets->push_back(cur.offset);
    940   }
    941   return formatted;
    942 }
    943 
    944 string16 ReplaceStringPlaceholders(const string16& format_string,
    945                                    const std::vector<string16>& subst,
    946                                    std::vector<size_t>* offsets) {
    947   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    948 }
    949 
    950 std::string ReplaceStringPlaceholders(const StringPiece& format_string,
    951                                       const std::vector<std::string>& subst,
    952                                       std::vector<size_t>* offsets) {
    953   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    954 }
    955 
    956 string16 ReplaceStringPlaceholders(const string16& format_string,
    957                                    const string16& a,
    958                                    size_t* offset) {
    959   std::vector<size_t> offsets;
    960   std::vector<string16> subst;
    961   subst.push_back(a);
    962   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
    963 
    964   DCHECK_EQ(1U, offsets.size());
    965   if (offset)
    966     *offset = offsets[0];
    967   return result;
    968 }
    969 
    970 // The following code is compatible with the OpenBSD lcpy interface.  See:
    971 //   http://www.gratisoft.us/todd/papers/strlcpy.html
    972 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
    973 
    974 namespace {
    975 
    976 template <typename CHAR>
    977 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
    978   for (size_t i = 0; i < dst_size; ++i) {
    979     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
    980       return i;
    981   }
    982 
    983   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
    984   if (dst_size != 0)
    985     dst[dst_size - 1] = 0;
    986 
    987   // Count the rest of the |src|, and return it's length in characters.
    988   while (src[dst_size]) ++dst_size;
    989   return dst_size;
    990 }
    991 
    992 }  // namespace
    993 
    994 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
    995   return lcpyT<char>(dst, src, dst_size);
    996 }
    997 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
    998   return lcpyT<wchar_t>(dst, src, dst_size);
    999 }
   1000 
   1001 }  // namespace base
   1002