Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <ctype.h>
      8 #include <errno.h>
      9 #include <math.h>
     10 #include <stdarg.h>
     11 #include <stdint.h>
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 #include <time.h>
     16 #include <wchar.h>
     17 #include <wctype.h>
     18 
     19 #include <algorithm>
     20 #include <limits>
     21 #include <vector>
     22 
     23 #include "base/logging.h"
     24 #include "base/macros.h"
     25 #include "base/memory/singleton.h"
     26 #include "base/strings/utf_string_conversion_utils.h"
     27 #include "base/strings/utf_string_conversions.h"
     28 #include "base/third_party/icu/icu_utf.h"
     29 #include "build/build_config.h"
     30 
     31 namespace base {
     32 
     33 namespace {
     34 
     35 // Force the singleton used by EmptyString[16] to be a unique type. This
     36 // prevents other code that might accidentally use Singleton<string> from
     37 // getting our internal one.
     38 struct EmptyStrings {
     39   EmptyStrings() {}
     40   const std::string s;
     41   const string16 s16;
     42 
     43   static EmptyStrings* GetInstance() {
     44     return Singleton<EmptyStrings>::get();
     45   }
     46 };
     47 
     48 // Used by ReplaceStringPlaceholders to track the position in the string of
     49 // replaced parameters.
     50 struct ReplacementOffset {
     51   ReplacementOffset(uintptr_t parameter, size_t offset)
     52       : parameter(parameter),
     53         offset(offset) {}
     54 
     55   // Index of the parameter.
     56   uintptr_t parameter;
     57 
     58   // Starting position in the string.
     59   size_t offset;
     60 };
     61 
     62 static bool CompareParameter(const ReplacementOffset& elem1,
     63                              const ReplacementOffset& elem2) {
     64   return elem1.parameter < elem2.parameter;
     65 }
     66 
     67 // Overloaded function to append one string onto the end of another. Having a
     68 // separate overload for |source| as both string and StringPiece allows for more
     69 // efficient usage from functions templated to work with either type (avoiding a
     70 // redundant call to the BasicStringPiece constructor in both cases).
     71 template <typename string_type>
     72 inline void AppendToString(string_type* target, const string_type& source) {
     73   target->append(source);
     74 }
     75 
     76 template <typename string_type>
     77 inline void AppendToString(string_type* target,
     78                            const BasicStringPiece<string_type>& source) {
     79   source.AppendToString(target);
     80 }
     81 
     82 // Assuming that a pointer is the size of a "machine word", then
     83 // uintptr_t is an integer type that is also a machine word.
     84 typedef uintptr_t MachineWord;
     85 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
     86 
     87 inline bool IsAlignedToMachineWord(const void* pointer) {
     88   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
     89 }
     90 
     91 template<typename T> inline T* AlignToMachineWord(T* pointer) {
     92   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
     93                               ~kMachineWordAlignmentMask);
     94 }
     95 
     96 template<size_t size, typename CharacterType> struct NonASCIIMask;
     97 template<> struct NonASCIIMask<4, char16> {
     98     static inline uint32_t value() { return 0xFF80FF80U; }
     99 };
    100 template<> struct NonASCIIMask<4, char> {
    101     static inline uint32_t value() { return 0x80808080U; }
    102 };
    103 template<> struct NonASCIIMask<8, char16> {
    104     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
    105 };
    106 template<> struct NonASCIIMask<8, char> {
    107     static inline uint64_t value() { return 0x8080808080808080ULL; }
    108 };
    109 #if defined(WCHAR_T_IS_UTF32)
    110 template<> struct NonASCIIMask<4, wchar_t> {
    111     static inline uint32_t value() { return 0xFFFFFF80U; }
    112 };
    113 template<> struct NonASCIIMask<8, wchar_t> {
    114     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
    115 };
    116 #endif  // WCHAR_T_IS_UTF32
    117 
    118 }  // namespace
    119 
    120 bool IsWprintfFormatPortable(const wchar_t* format) {
    121   for (const wchar_t* position = format; *position != '\0'; ++position) {
    122     if (*position == '%') {
    123       bool in_specification = true;
    124       bool modifier_l = false;
    125       while (in_specification) {
    126         // Eat up characters until reaching a known specifier.
    127         if (*++position == '\0') {
    128           // The format string ended in the middle of a specification.  Call
    129           // it portable because no unportable specifications were found.  The
    130           // string is equally broken on all platforms.
    131           return true;
    132         }
    133 
    134         if (*position == 'l') {
    135           // 'l' is the only thing that can save the 's' and 'c' specifiers.
    136           modifier_l = true;
    137         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
    138                    *position == 'S' || *position == 'C' || *position == 'F' ||
    139                    *position == 'D' || *position == 'O' || *position == 'U') {
    140           // Not portable.
    141           return false;
    142         }
    143 
    144         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
    145           // Portable, keep scanning the rest of the format string.
    146           in_specification = false;
    147         }
    148       }
    149     }
    150   }
    151 
    152   return true;
    153 }
    154 
    155 namespace {
    156 
    157 template<typename StringType>
    158 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
    159   StringType ret;
    160   ret.reserve(str.size());
    161   for (size_t i = 0; i < str.size(); i++)
    162     ret.push_back(ToLowerASCII(str[i]));
    163   return ret;
    164 }
    165 
    166 template<typename StringType>
    167 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
    168   StringType ret;
    169   ret.reserve(str.size());
    170   for (size_t i = 0; i < str.size(); i++)
    171     ret.push_back(ToUpperASCII(str[i]));
    172   return ret;
    173 }
    174 
    175 }  // namespace
    176 
    177 std::string ToLowerASCII(StringPiece str) {
    178   return ToLowerASCIIImpl<std::string>(str);
    179 }
    180 
    181 string16 ToLowerASCII(StringPiece16 str) {
    182   return ToLowerASCIIImpl<string16>(str);
    183 }
    184 
    185 std::string ToUpperASCII(StringPiece str) {
    186   return ToUpperASCIIImpl<std::string>(str);
    187 }
    188 
    189 string16 ToUpperASCII(StringPiece16 str) {
    190   return ToUpperASCIIImpl<string16>(str);
    191 }
    192 
    193 template<class StringType>
    194 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
    195                                  BasicStringPiece<StringType> b) {
    196   // Find the first characters that aren't equal and compare them.  If the end
    197   // of one of the strings is found before a nonequal character, the lengths
    198   // of the strings are compared.
    199   size_t i = 0;
    200   while (i < a.length() && i < b.length()) {
    201     typename StringType::value_type lower_a = ToLowerASCII(a[i]);
    202     typename StringType::value_type lower_b = ToLowerASCII(b[i]);
    203     if (lower_a < lower_b)
    204       return -1;
    205     if (lower_a > lower_b)
    206       return 1;
    207     i++;
    208   }
    209 
    210   // End of one string hit before finding a different character. Expect the
    211   // common case to be "strings equal" at this point so check that first.
    212   if (a.length() == b.length())
    213     return 0;
    214 
    215   if (a.length() < b.length())
    216     return -1;
    217   return 1;
    218 }
    219 
    220 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    221   return CompareCaseInsensitiveASCIIT<std::string>(a, b);
    222 }
    223 
    224 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    225   return CompareCaseInsensitiveASCIIT<string16>(a, b);
    226 }
    227 
    228 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
    229   if (a.length() != b.length())
    230     return false;
    231   return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
    232 }
    233 
    234 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
    235   if (a.length() != b.length())
    236     return false;
    237   return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
    238 }
    239 
    240 const std::string& EmptyString() {
    241   return EmptyStrings::GetInstance()->s;
    242 }
    243 
    244 const string16& EmptyString16() {
    245   return EmptyStrings::GetInstance()->s16;
    246 }
    247 
    248 template<typename STR>
    249 bool ReplaceCharsT(const STR& input,
    250                    const STR& replace_chars,
    251                    const STR& replace_with,
    252                    STR* output) {
    253   bool removed = false;
    254   size_t replace_length = replace_with.length();
    255 
    256   *output = input;
    257 
    258   size_t found = output->find_first_of(replace_chars);
    259   while (found != STR::npos) {
    260     removed = true;
    261     output->replace(found, 1, replace_with);
    262     found = output->find_first_of(replace_chars, found + replace_length);
    263   }
    264 
    265   return removed;
    266 }
    267 
    268 bool ReplaceChars(const string16& input,
    269                   const StringPiece16& replace_chars,
    270                   const string16& replace_with,
    271                   string16* output) {
    272   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    273 }
    274 
    275 bool ReplaceChars(const std::string& input,
    276                   const StringPiece& replace_chars,
    277                   const std::string& replace_with,
    278                   std::string* output) {
    279   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
    280 }
    281 
    282 bool RemoveChars(const string16& input,
    283                  const StringPiece16& remove_chars,
    284                  string16* output) {
    285   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
    286 }
    287 
    288 bool RemoveChars(const std::string& input,
    289                  const StringPiece& remove_chars,
    290                  std::string* output) {
    291   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
    292 }
    293 
    294 template<typename Str>
    295 TrimPositions TrimStringT(const Str& input,
    296                           BasicStringPiece<Str> trim_chars,
    297                           TrimPositions positions,
    298                           Str* output) {
    299   // Find the edges of leading/trailing whitespace as desired. Need to use
    300   // a StringPiece version of input to be able to call find* on it with the
    301   // StringPiece version of trim_chars (normally the trim_chars will be a
    302   // constant so avoid making a copy).
    303   BasicStringPiece<Str> input_piece(input);
    304   const size_t last_char = input.length() - 1;
    305   const size_t first_good_char = (positions & TRIM_LEADING) ?
    306       input_piece.find_first_not_of(trim_chars) : 0;
    307   const size_t last_good_char = (positions & TRIM_TRAILING) ?
    308       input_piece.find_last_not_of(trim_chars) : last_char;
    309 
    310   // When the string was all trimmed, report that we stripped off characters
    311   // from whichever position the caller was interested in. For empty input, we
    312   // stripped no characters, but we still need to clear |output|.
    313   if (input.empty() ||
    314       (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
    315     bool input_was_empty = input.empty();  // in case output == &input
    316     output->clear();
    317     return input_was_empty ? TRIM_NONE : positions;
    318   }
    319 
    320   // Trim.
    321   *output =
    322       input.substr(first_good_char, last_good_char - first_good_char + 1);
    323 
    324   // Return where we trimmed from.
    325   return static_cast<TrimPositions>(
    326       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
    327       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
    328 }
    329 
    330 bool TrimString(const string16& input,
    331                 StringPiece16 trim_chars,
    332                 string16* output) {
    333   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    334 }
    335 
    336 bool TrimString(const std::string& input,
    337                 StringPiece trim_chars,
    338                 std::string* output) {
    339   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
    340 }
    341 
    342 template<typename Str>
    343 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
    344                                        BasicStringPiece<Str> trim_chars,
    345                                        TrimPositions positions) {
    346   size_t begin = (positions & TRIM_LEADING) ?
    347       input.find_first_not_of(trim_chars) : 0;
    348   size_t end = (positions & TRIM_TRAILING) ?
    349       input.find_last_not_of(trim_chars) + 1 : input.size();
    350   return input.substr(begin, end - begin);
    351 }
    352 
    353 StringPiece16 TrimString(StringPiece16 input,
    354                          const StringPiece16& trim_chars,
    355                          TrimPositions positions) {
    356   return TrimStringPieceT(input, trim_chars, positions);
    357 }
    358 
    359 StringPiece TrimString(StringPiece input,
    360                        const StringPiece& trim_chars,
    361                        TrimPositions positions) {
    362   return TrimStringPieceT(input, trim_chars, positions);
    363 }
    364 
    365 void TruncateUTF8ToByteSize(const std::string& input,
    366                             const size_t byte_size,
    367                             std::string* output) {
    368   DCHECK(output);
    369   if (byte_size > input.length()) {
    370     *output = input;
    371     return;
    372   }
    373   DCHECK_LE(byte_size,
    374             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
    375   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
    376   int32_t truncation_length = static_cast<int32_t>(byte_size);
    377   int32_t char_index = truncation_length - 1;
    378   const char* data = input.data();
    379 
    380   // Using CBU8, we will move backwards from the truncation point
    381   // to the beginning of the string looking for a valid UTF8
    382   // character.  Once a full UTF8 character is found, we will
    383   // truncate the string to the end of that character.
    384   while (char_index >= 0) {
    385     int32_t prev = char_index;
    386     base_icu::UChar32 code_point = 0;
    387     CBU8_NEXT(data, char_index, truncation_length, code_point);
    388     if (!IsValidCharacter(code_point) ||
    389         !IsValidCodepoint(code_point)) {
    390       char_index = prev - 1;
    391     } else {
    392       break;
    393     }
    394   }
    395 
    396   if (char_index >= 0 )
    397     *output = input.substr(0, char_index);
    398   else
    399     output->clear();
    400 }
    401 
    402 TrimPositions TrimWhitespace(const string16& input,
    403                              TrimPositions positions,
    404                              string16* output) {
    405   return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
    406 }
    407 
    408 StringPiece16 TrimWhitespace(StringPiece16 input,
    409                              TrimPositions positions) {
    410   return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
    411 }
    412 
    413 TrimPositions TrimWhitespaceASCII(const std::string& input,
    414                                   TrimPositions positions,
    415                                   std::string* output) {
    416   return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
    417 }
    418 
    419 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
    420   return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
    421 }
    422 
    423 template<typename STR>
    424 STR CollapseWhitespaceT(const STR& text,
    425                         bool trim_sequences_with_line_breaks) {
    426   STR result;
    427   result.resize(text.size());
    428 
    429   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
    430   // will trim any leading whitespace.
    431   bool in_whitespace = true;
    432   bool already_trimmed = true;
    433 
    434   int chars_written = 0;
    435   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
    436     if (IsUnicodeWhitespace(*i)) {
    437       if (!in_whitespace) {
    438         // Reduce all whitespace sequences to a single space.
    439         in_whitespace = true;
    440         result[chars_written++] = L' ';
    441       }
    442       if (trim_sequences_with_line_breaks && !already_trimmed &&
    443           ((*i == '\n') || (*i == '\r'))) {
    444         // Whitespace sequences containing CR or LF are eliminated entirely.
    445         already_trimmed = true;
    446         --chars_written;
    447       }
    448     } else {
    449       // Non-whitespace chracters are copied straight across.
    450       in_whitespace = false;
    451       already_trimmed = false;
    452       result[chars_written++] = *i;
    453     }
    454   }
    455 
    456   if (in_whitespace && !already_trimmed) {
    457     // Any trailing whitespace is eliminated.
    458     --chars_written;
    459   }
    460 
    461   result.resize(chars_written);
    462   return result;
    463 }
    464 
    465 string16 CollapseWhitespace(const string16& text,
    466                             bool trim_sequences_with_line_breaks) {
    467   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    468 }
    469 
    470 std::string CollapseWhitespaceASCII(const std::string& text,
    471                                     bool trim_sequences_with_line_breaks) {
    472   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
    473 }
    474 
    475 bool ContainsOnlyChars(const StringPiece& input,
    476                        const StringPiece& characters) {
    477   return input.find_first_not_of(characters) == StringPiece::npos;
    478 }
    479 
    480 bool ContainsOnlyChars(const StringPiece16& input,
    481                        const StringPiece16& characters) {
    482   return input.find_first_not_of(characters) == StringPiece16::npos;
    483 }
    484 
    485 template <class Char>
    486 inline bool DoIsStringASCII(const Char* characters, size_t length) {
    487   MachineWord all_char_bits = 0;
    488   const Char* end = characters + length;
    489 
    490   // Prologue: align the input.
    491   while (!IsAlignedToMachineWord(characters) && characters != end) {
    492     all_char_bits |= *characters;
    493     ++characters;
    494   }
    495 
    496   // Compare the values of CPU word size.
    497   const Char* word_end = AlignToMachineWord(end);
    498   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
    499   while (characters < word_end) {
    500     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
    501     characters += loop_increment;
    502   }
    503 
    504   // Process the remaining bytes.
    505   while (characters != end) {
    506     all_char_bits |= *characters;
    507     ++characters;
    508   }
    509 
    510   MachineWord non_ascii_bit_mask =
    511       NonASCIIMask<sizeof(MachineWord), Char>::value();
    512   return !(all_char_bits & non_ascii_bit_mask);
    513 }
    514 
    515 bool IsStringASCII(const StringPiece& str) {
    516   return DoIsStringASCII(str.data(), str.length());
    517 }
    518 
    519 bool IsStringASCII(const StringPiece16& str) {
    520   return DoIsStringASCII(str.data(), str.length());
    521 }
    522 
    523 bool IsStringASCII(const string16& str) {
    524   return DoIsStringASCII(str.data(), str.length());
    525 }
    526 
    527 #if defined(WCHAR_T_IS_UTF32)
    528 bool IsStringASCII(const std::wstring& str) {
    529   return DoIsStringASCII(str.data(), str.length());
    530 }
    531 #endif
    532 
    533 bool IsStringUTF8(const StringPiece& str) {
    534   const char *src = str.data();
    535   int32_t src_len = static_cast<int32_t>(str.length());
    536   int32_t char_index = 0;
    537 
    538   while (char_index < src_len) {
    539     int32_t code_point;
    540     CBU8_NEXT(src, char_index, src_len, code_point);
    541     if (!IsValidCharacter(code_point))
    542       return false;
    543   }
    544   return true;
    545 }
    546 
    547 // Implementation note: Normally this function will be called with a hardcoded
    548 // constant for the lowercase_ascii parameter. Constructing a StringPiece from
    549 // a C constant requires running strlen, so the result will be two passes
    550 // through the buffers, one to file the length of lowercase_ascii, and one to
    551 // compare each letter.
    552 //
    553 // This function could have taken a const char* to avoid this and only do one
    554 // pass through the string. But the strlen is faster than the case-insensitive
    555 // compares and lets us early-exit in the case that the strings are different
    556 // lengths (will often be the case for non-matches). So whether one approach or
    557 // the other will be faster depends on the case.
    558 //
    559 // The hardcoded strings are typically very short so it doesn't matter, and the
    560 // string piece gives additional flexibility for the caller (doesn't have to be
    561 // null terminated) so we choose the StringPiece route.
    562 template<typename Str>
    563 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
    564                                           StringPiece lowercase_ascii) {
    565   if (str.size() != lowercase_ascii.size())
    566     return false;
    567   for (size_t i = 0; i < str.size(); i++) {
    568     if (ToLowerASCII(str[i]) != lowercase_ascii[i])
    569       return false;
    570   }
    571   return true;
    572 }
    573 
    574 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
    575   return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
    576 }
    577 
    578 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
    579   return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
    580 }
    581 
    582 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
    583   if (str.length() != ascii.length())
    584     return false;
    585   return std::equal(ascii.begin(), ascii.end(), str.begin());
    586 }
    587 
    588 template<typename Str>
    589 bool StartsWithT(BasicStringPiece<Str> str,
    590                  BasicStringPiece<Str> search_for,
    591                  CompareCase case_sensitivity) {
    592   if (search_for.size() > str.size())
    593     return false;
    594 
    595   BasicStringPiece<Str> source = str.substr(0, search_for.size());
    596 
    597   switch (case_sensitivity) {
    598     case CompareCase::SENSITIVE:
    599       return source == search_for;
    600 
    601     case CompareCase::INSENSITIVE_ASCII:
    602       return std::equal(
    603           search_for.begin(), search_for.end(),
    604           source.begin(),
    605           CaseInsensitiveCompareASCII<typename Str::value_type>());
    606 
    607     default:
    608       NOTREACHED();
    609       return false;
    610   }
    611 }
    612 
    613 bool StartsWith(StringPiece str,
    614                 StringPiece search_for,
    615                 CompareCase case_sensitivity) {
    616   return StartsWithT<std::string>(str, search_for, case_sensitivity);
    617 }
    618 
    619 bool StartsWith(StringPiece16 str,
    620                 StringPiece16 search_for,
    621                 CompareCase case_sensitivity) {
    622   return StartsWithT<string16>(str, search_for, case_sensitivity);
    623 }
    624 
    625 template <typename Str>
    626 bool EndsWithT(BasicStringPiece<Str> str,
    627                BasicStringPiece<Str> search_for,
    628                CompareCase case_sensitivity) {
    629   if (search_for.size() > str.size())
    630     return false;
    631 
    632   BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
    633                                             search_for.size());
    634 
    635   switch (case_sensitivity) {
    636     case CompareCase::SENSITIVE:
    637       return source == search_for;
    638 
    639     case CompareCase::INSENSITIVE_ASCII:
    640       return std::equal(
    641           source.begin(), source.end(),
    642           search_for.begin(),
    643           CaseInsensitiveCompareASCII<typename Str::value_type>());
    644 
    645     default:
    646       NOTREACHED();
    647       return false;
    648   }
    649 }
    650 
    651 bool EndsWith(StringPiece str,
    652               StringPiece search_for,
    653               CompareCase case_sensitivity) {
    654   return EndsWithT<std::string>(str, search_for, case_sensitivity);
    655 }
    656 
    657 bool EndsWith(StringPiece16 str,
    658               StringPiece16 search_for,
    659               CompareCase case_sensitivity) {
    660   return EndsWithT<string16>(str, search_for, case_sensitivity);
    661 }
    662 
    663 char HexDigitToInt(wchar_t c) {
    664   DCHECK(IsHexDigit(c));
    665   if (c >= '0' && c <= '9')
    666     return static_cast<char>(c - '0');
    667   if (c >= 'A' && c <= 'F')
    668     return static_cast<char>(c - 'A' + 10);
    669   if (c >= 'a' && c <= 'f')
    670     return static_cast<char>(c - 'a' + 10);
    671   return 0;
    672 }
    673 
    674 bool IsUnicodeWhitespace(wchar_t c) {
    675   // kWhitespaceWide is a NULL-terminated string
    676   for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) {
    677     if (*cur == c)
    678       return true;
    679   }
    680   return false;
    681 }
    682 
    683 static const char* const kByteStringsUnlocalized[] = {
    684   " B",
    685   " kB",
    686   " MB",
    687   " GB",
    688   " TB",
    689   " PB"
    690 };
    691 
    692 string16 FormatBytesUnlocalized(int64_t bytes) {
    693   double unit_amount = static_cast<double>(bytes);
    694   size_t dimension = 0;
    695   const int kKilo = 1024;
    696   while (unit_amount >= kKilo &&
    697          dimension < arraysize(kByteStringsUnlocalized) - 1) {
    698     unit_amount /= kKilo;
    699     dimension++;
    700   }
    701 
    702   char buf[64];
    703   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
    704     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
    705                    kByteStringsUnlocalized[dimension]);
    706   } else {
    707     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
    708                    kByteStringsUnlocalized[dimension]);
    709   }
    710 
    711   return ASCIIToUTF16(buf);
    712 }
    713 
    714 // Runs in O(n) time in the length of |str|.
    715 template<class StringType>
    716 void DoReplaceSubstringsAfterOffset(StringType* str,
    717                                     size_t offset,
    718                                     BasicStringPiece<StringType> find_this,
    719                                     BasicStringPiece<StringType> replace_with,
    720                                     bool replace_all) {
    721   DCHECK(!find_this.empty());
    722 
    723   // If the find string doesn't appear, there's nothing to do.
    724   offset = str->find(find_this.data(), offset, find_this.size());
    725   if (offset == StringType::npos)
    726     return;
    727 
    728   // If we're only replacing one instance, there's no need to do anything
    729   // complicated.
    730   size_t find_length = find_this.length();
    731   if (!replace_all) {
    732     str->replace(offset, find_length, replace_with.data(), replace_with.size());
    733     return;
    734   }
    735 
    736   // If the find and replace strings are the same length, we can simply use
    737   // replace() on each instance, and finish the entire operation in O(n) time.
    738   size_t replace_length = replace_with.length();
    739   if (find_length == replace_length) {
    740     do {
    741       str->replace(offset, find_length,
    742                    replace_with.data(), replace_with.size());
    743       offset = str->find(find_this.data(), offset + replace_length,
    744                          find_this.size());
    745     } while (offset != StringType::npos);
    746     return;
    747   }
    748 
    749   // Since the find and replace strings aren't the same length, a loop like the
    750   // one above would be O(n^2) in the worst case, as replace() will shift the
    751   // entire remaining string each time.  We need to be more clever to keep
    752   // things O(n).
    753   //
    754   // If we're shortening the string, we can alternate replacements with shifting
    755   // forward the intervening characters using memmove().
    756   size_t str_length = str->length();
    757   if (find_length > replace_length) {
    758     size_t write_offset = offset;
    759     do {
    760       if (replace_length) {
    761         str->replace(write_offset, replace_length,
    762                      replace_with.data(), replace_with.size());
    763         write_offset += replace_length;
    764       }
    765       size_t read_offset = offset + find_length;
    766       offset = std::min(
    767           str->find(find_this.data(), read_offset, find_this.size()),
    768           str_length);
    769       size_t length = offset - read_offset;
    770       if (length) {
    771         memmove(&(*str)[write_offset], &(*str)[read_offset],
    772                 length * sizeof(typename StringType::value_type));
    773         write_offset += length;
    774       }
    775     } while (offset < str_length);
    776     str->resize(write_offset);
    777     return;
    778   }
    779 
    780   // We're lengthening the string.  We can use alternating replacements and
    781   // memmove() calls like above, but we need to precalculate the final string
    782   // length and then expand from back-to-front to avoid overwriting the string
    783   // as we're reading it, needing to shift, or having to copy to a second string
    784   // temporarily.
    785   size_t first_match = offset;
    786 
    787   // First, calculate the final length and resize the string.
    788   size_t final_length = str_length;
    789   size_t expansion = replace_length - find_length;
    790   size_t current_match;
    791   do {
    792     final_length += expansion;
    793     // Minor optimization: save this offset into |current_match|, so that on
    794     // exit from the loop, |current_match| will point at the last instance of
    795     // the find string, and we won't need to find() it again immediately.
    796     current_match = offset;
    797     offset = str->find(find_this.data(), offset + find_length,
    798                        find_this.size());
    799   } while (offset != StringType::npos);
    800   str->resize(final_length);
    801 
    802   // Now do the replacement loop, working backwards through the string.
    803   for (size_t prev_match = str_length, write_offset = final_length; ;
    804        current_match = str->rfind(find_this.data(), current_match - 1,
    805                                   find_this.size())) {
    806     size_t read_offset = current_match + find_length;
    807     size_t length = prev_match - read_offset;
    808     if (length) {
    809       write_offset -= length;
    810       memmove(&(*str)[write_offset], &(*str)[read_offset],
    811               length * sizeof(typename StringType::value_type));
    812     }
    813     write_offset -= replace_length;
    814     str->replace(write_offset, replace_length,
    815                  replace_with.data(), replace_with.size());
    816     if (current_match == first_match)
    817       return;
    818     prev_match = current_match;
    819   }
    820 }
    821 
    822 void ReplaceFirstSubstringAfterOffset(string16* str,
    823                                       size_t start_offset,
    824                                       StringPiece16 find_this,
    825                                       StringPiece16 replace_with) {
    826   DoReplaceSubstringsAfterOffset<string16>(
    827       str, start_offset, find_this, replace_with, false);  // Replace first.
    828 }
    829 
    830 void ReplaceFirstSubstringAfterOffset(std::string* str,
    831                                       size_t start_offset,
    832                                       StringPiece find_this,
    833                                       StringPiece replace_with) {
    834   DoReplaceSubstringsAfterOffset<std::string>(
    835       str, start_offset, find_this, replace_with, false);  // Replace first.
    836 }
    837 
    838 void ReplaceSubstringsAfterOffset(string16* str,
    839                                   size_t start_offset,
    840                                   StringPiece16 find_this,
    841                                   StringPiece16 replace_with) {
    842   DoReplaceSubstringsAfterOffset<string16>(
    843       str, start_offset, find_this, replace_with, true);  // Replace all.
    844 }
    845 
    846 void ReplaceSubstringsAfterOffset(std::string* str,
    847                                   size_t start_offset,
    848                                   StringPiece find_this,
    849                                   StringPiece replace_with) {
    850   DoReplaceSubstringsAfterOffset<std::string>(
    851       str, start_offset, find_this, replace_with, true);  // Replace all.
    852 }
    853 
    854 template <class string_type>
    855 inline typename string_type::value_type* WriteIntoT(string_type* str,
    856                                                     size_t length_with_null) {
    857   DCHECK_GT(length_with_null, 1u);
    858   str->reserve(length_with_null);
    859   str->resize(length_with_null - 1);
    860   return &((*str)[0]);
    861 }
    862 
    863 char* WriteInto(std::string* str, size_t length_with_null) {
    864   return WriteIntoT(str, length_with_null);
    865 }
    866 
    867 char16* WriteInto(string16* str, size_t length_with_null) {
    868   return WriteIntoT(str, length_with_null);
    869 }
    870 
    871 // Generic version for all JoinString overloads. |list_type| must be a sequence
    872 // (std::vector or std::initializer_list) of strings/StringPieces (std::string,
    873 // string16, StringPiece or StringPiece16). |string_type| is either std::string
    874 // or string16.
    875 template <typename list_type, typename string_type>
    876 static string_type JoinStringT(const list_type& parts,
    877                                BasicStringPiece<string_type> sep) {
    878   if (parts.size() == 0)
    879     return string_type();
    880 
    881   // Pre-allocate the eventual size of the string. Start with the size of all of
    882   // the separators (note that this *assumes* parts.size() > 0).
    883   size_t total_size = (parts.size() - 1) * sep.size();
    884   for (const auto& part : parts)
    885     total_size += part.size();
    886   string_type result;
    887   result.reserve(total_size);
    888 
    889   auto iter = parts.begin();
    890   DCHECK(iter != parts.end());
    891   AppendToString(&result, *iter);
    892   ++iter;
    893 
    894   for (; iter != parts.end(); ++iter) {
    895     sep.AppendToString(&result);
    896     // Using the overloaded AppendToString allows this template function to work
    897     // on both strings and StringPieces without creating an intermediate
    898     // StringPiece object.
    899     AppendToString(&result, *iter);
    900   }
    901 
    902   // Sanity-check that we pre-allocated correctly.
    903   DCHECK_EQ(total_size, result.size());
    904 
    905   return result;
    906 }
    907 
    908 std::string JoinString(const std::vector<std::string>& parts,
    909                        StringPiece separator) {
    910   return JoinStringT(parts, separator);
    911 }
    912 
    913 string16 JoinString(const std::vector<string16>& parts,
    914                     StringPiece16 separator) {
    915   return JoinStringT(parts, separator);
    916 }
    917 
    918 std::string JoinString(const std::vector<StringPiece>& parts,
    919                        StringPiece separator) {
    920   return JoinStringT(parts, separator);
    921 }
    922 
    923 string16 JoinString(const std::vector<StringPiece16>& parts,
    924                     StringPiece16 separator) {
    925   return JoinStringT(parts, separator);
    926 }
    927 
    928 std::string JoinString(std::initializer_list<StringPiece> parts,
    929                        StringPiece separator) {
    930   return JoinStringT(parts, separator);
    931 }
    932 
    933 string16 JoinString(std::initializer_list<StringPiece16> parts,
    934                     StringPiece16 separator) {
    935   return JoinStringT(parts, separator);
    936 }
    937 
    938 template<class FormatStringType, class OutStringType>
    939 OutStringType DoReplaceStringPlaceholders(
    940     const FormatStringType& format_string,
    941     const std::vector<OutStringType>& subst,
    942     std::vector<size_t>* offsets) {
    943   size_t substitutions = subst.size();
    944   DCHECK_LT(substitutions, 10U);
    945 
    946   size_t sub_length = 0;
    947   for (const auto& cur : subst)
    948     sub_length += cur.length();
    949 
    950   OutStringType formatted;
    951   formatted.reserve(format_string.length() + sub_length);
    952 
    953   std::vector<ReplacementOffset> r_offsets;
    954   for (auto i = format_string.begin(); i != format_string.end(); ++i) {
    955     if ('$' == *i) {
    956       if (i + 1 != format_string.end()) {
    957         ++i;
    958         if ('$' == *i) {
    959           while (i != format_string.end() && '$' == *i) {
    960             formatted.push_back('$');
    961             ++i;
    962           }
    963           --i;
    964         } else {
    965           if (*i < '1' || *i > '9') {
    966             DLOG(ERROR) << "Invalid placeholder: $" << *i;
    967             continue;
    968           }
    969           uintptr_t index = *i - '1';
    970           if (offsets) {
    971             ReplacementOffset r_offset(index,
    972                 static_cast<int>(formatted.size()));
    973             r_offsets.insert(std::lower_bound(r_offsets.begin(),
    974                                               r_offsets.end(),
    975                                               r_offset,
    976                                               &CompareParameter),
    977                              r_offset);
    978           }
    979           if (index < substitutions)
    980             formatted.append(subst.at(index));
    981         }
    982       }
    983     } else {
    984       formatted.push_back(*i);
    985     }
    986   }
    987   if (offsets) {
    988     for (const auto& cur : r_offsets)
    989       offsets->push_back(cur.offset);
    990   }
    991   return formatted;
    992 }
    993 
    994 string16 ReplaceStringPlaceholders(const string16& format_string,
    995                                    const std::vector<string16>& subst,
    996                                    std::vector<size_t>* offsets) {
    997   return DoReplaceStringPlaceholders(format_string, subst, offsets);
    998 }
    999 
   1000 std::string ReplaceStringPlaceholders(const StringPiece& format_string,
   1001                                       const std::vector<std::string>& subst,
   1002                                       std::vector<size_t>* offsets) {
   1003   return DoReplaceStringPlaceholders(format_string, subst, offsets);
   1004 }
   1005 
   1006 string16 ReplaceStringPlaceholders(const string16& format_string,
   1007                                    const string16& a,
   1008                                    size_t* offset) {
   1009   std::vector<size_t> offsets;
   1010   std::vector<string16> subst;
   1011   subst.push_back(a);
   1012   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
   1013 
   1014   DCHECK_EQ(1U, offsets.size());
   1015   if (offset)
   1016     *offset = offsets[0];
   1017   return result;
   1018 }
   1019 
   1020 // The following code is compatible with the OpenBSD lcpy interface.  See:
   1021 //   http://www.gratisoft.us/todd/papers/strlcpy.html
   1022 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
   1023 
   1024 namespace {
   1025 
   1026 template <typename CHAR>
   1027 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
   1028   for (size_t i = 0; i < dst_size; ++i) {
   1029     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
   1030       return i;
   1031   }
   1032 
   1033   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
   1034   if (dst_size != 0)
   1035     dst[dst_size - 1] = 0;
   1036 
   1037   // Count the rest of the |src|, and return it's length in characters.
   1038   while (src[dst_size]) ++dst_size;
   1039   return dst_size;
   1040 }
   1041 
   1042 }  // namespace
   1043 
   1044 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
   1045   return lcpyT<char>(dst, src, dst_size);
   1046 }
   1047 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
   1048   return lcpyT<wchar_t>(dst, src, dst_size);
   1049 }
   1050 
   1051 }  // namespace base
   1052