1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_util.h" 6 7 #include <ctype.h> 8 #include <errno.h> 9 #include <math.h> 10 #include <stdarg.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <time.h> 16 #include <wchar.h> 17 #include <wctype.h> 18 19 #include <algorithm> 20 #include <limits> 21 #include <vector> 22 23 #include "base/logging.h" 24 #include "base/macros.h" 25 #include "base/memory/singleton.h" 26 #include "base/strings/utf_string_conversion_utils.h" 27 #include "base/strings/utf_string_conversions.h" 28 #include "base/third_party/icu/icu_utf.h" 29 #include "build/build_config.h" 30 31 namespace base { 32 33 namespace { 34 35 // Force the singleton used by EmptyString[16] to be a unique type. This 36 // prevents other code that might accidentally use Singleton<string> from 37 // getting our internal one. 38 struct EmptyStrings { 39 EmptyStrings() {} 40 const std::string s; 41 const string16 s16; 42 43 static EmptyStrings* GetInstance() { 44 return Singleton<EmptyStrings>::get(); 45 } 46 }; 47 48 // Used by ReplaceStringPlaceholders to track the position in the string of 49 // replaced parameters. 50 struct ReplacementOffset { 51 ReplacementOffset(uintptr_t parameter, size_t offset) 52 : parameter(parameter), 53 offset(offset) {} 54 55 // Index of the parameter. 56 uintptr_t parameter; 57 58 // Starting position in the string. 59 size_t offset; 60 }; 61 62 static bool CompareParameter(const ReplacementOffset& elem1, 63 const ReplacementOffset& elem2) { 64 return elem1.parameter < elem2.parameter; 65 } 66 67 // Assuming that a pointer is the size of a "machine word", then 68 // uintptr_t is an integer type that is also a machine word. 69 typedef uintptr_t MachineWord; 70 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1; 71 72 inline bool IsAlignedToMachineWord(const void* pointer) { 73 return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask); 74 } 75 76 template<typename T> inline T* AlignToMachineWord(T* pointer) { 77 return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) & 78 ~kMachineWordAlignmentMask); 79 } 80 81 template<size_t size, typename CharacterType> struct NonASCIIMask; 82 template<> struct NonASCIIMask<4, char16> { 83 static inline uint32_t value() { return 0xFF80FF80U; } 84 }; 85 template<> struct NonASCIIMask<4, char> { 86 static inline uint32_t value() { return 0x80808080U; } 87 }; 88 template<> struct NonASCIIMask<8, char16> { 89 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } 90 }; 91 template<> struct NonASCIIMask<8, char> { 92 static inline uint64_t value() { return 0x8080808080808080ULL; } 93 }; 94 #if defined(WCHAR_T_IS_UTF32) 95 template<> struct NonASCIIMask<4, wchar_t> { 96 static inline uint32_t value() { return 0xFFFFFF80U; } 97 }; 98 template<> struct NonASCIIMask<8, wchar_t> { 99 static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; } 100 }; 101 #endif // WCHAR_T_IS_UTF32 102 103 } // namespace 104 105 bool IsWprintfFormatPortable(const wchar_t* format) { 106 for (const wchar_t* position = format; *position != '\0'; ++position) { 107 if (*position == '%') { 108 bool in_specification = true; 109 bool modifier_l = false; 110 while (in_specification) { 111 // Eat up characters until reaching a known specifier. 112 if (*++position == '\0') { 113 // The format string ended in the middle of a specification. Call 114 // it portable because no unportable specifications were found. The 115 // string is equally broken on all platforms. 116 return true; 117 } 118 119 if (*position == 'l') { 120 // 'l' is the only thing that can save the 's' and 'c' specifiers. 121 modifier_l = true; 122 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 123 *position == 'S' || *position == 'C' || *position == 'F' || 124 *position == 'D' || *position == 'O' || *position == 'U') { 125 // Not portable. 126 return false; 127 } 128 129 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 130 // Portable, keep scanning the rest of the format string. 131 in_specification = false; 132 } 133 } 134 } 135 } 136 137 return true; 138 } 139 140 namespace { 141 142 template<typename StringType> 143 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { 144 StringType ret; 145 ret.reserve(str.size()); 146 for (size_t i = 0; i < str.size(); i++) 147 ret.push_back(ToLowerASCII(str[i])); 148 return ret; 149 } 150 151 template<typename StringType> 152 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { 153 StringType ret; 154 ret.reserve(str.size()); 155 for (size_t i = 0; i < str.size(); i++) 156 ret.push_back(ToUpperASCII(str[i])); 157 return ret; 158 } 159 160 } // namespace 161 162 std::string ToLowerASCII(StringPiece str) { 163 return ToLowerASCIIImpl<std::string>(str); 164 } 165 166 string16 ToLowerASCII(StringPiece16 str) { 167 return ToLowerASCIIImpl<string16>(str); 168 } 169 170 std::string ToUpperASCII(StringPiece str) { 171 return ToUpperASCIIImpl<std::string>(str); 172 } 173 174 string16 ToUpperASCII(StringPiece16 str) { 175 return ToUpperASCIIImpl<string16>(str); 176 } 177 178 template<class StringType> 179 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, 180 BasicStringPiece<StringType> b) { 181 // Find the first characters that aren't equal and compare them. If the end 182 // of one of the strings is found before a nonequal character, the lengths 183 // of the strings are compared. 184 size_t i = 0; 185 while (i < a.length() && i < b.length()) { 186 typename StringType::value_type lower_a = ToLowerASCII(a[i]); 187 typename StringType::value_type lower_b = ToLowerASCII(b[i]); 188 if (lower_a < lower_b) 189 return -1; 190 if (lower_a > lower_b) 191 return 1; 192 i++; 193 } 194 195 // End of one string hit before finding a different character. Expect the 196 // common case to be "strings equal" at this point so check that first. 197 if (a.length() == b.length()) 198 return 0; 199 200 if (a.length() < b.length()) 201 return -1; 202 return 1; 203 } 204 205 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) { 206 return CompareCaseInsensitiveASCIIT<std::string>(a, b); 207 } 208 209 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { 210 return CompareCaseInsensitiveASCIIT<string16>(a, b); 211 } 212 213 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { 214 if (a.length() != b.length()) 215 return false; 216 return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; 217 } 218 219 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { 220 if (a.length() != b.length()) 221 return false; 222 return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0; 223 } 224 225 const std::string& EmptyString() { 226 return EmptyStrings::GetInstance()->s; 227 } 228 229 const string16& EmptyString16() { 230 return EmptyStrings::GetInstance()->s16; 231 } 232 233 template<typename STR> 234 bool ReplaceCharsT(const STR& input, 235 const STR& replace_chars, 236 const STR& replace_with, 237 STR* output) { 238 bool removed = false; 239 size_t replace_length = replace_with.length(); 240 241 *output = input; 242 243 size_t found = output->find_first_of(replace_chars); 244 while (found != STR::npos) { 245 removed = true; 246 output->replace(found, 1, replace_with); 247 found = output->find_first_of(replace_chars, found + replace_length); 248 } 249 250 return removed; 251 } 252 253 bool ReplaceChars(const string16& input, 254 const StringPiece16& replace_chars, 255 const string16& replace_with, 256 string16* output) { 257 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 258 } 259 260 bool ReplaceChars(const std::string& input, 261 const StringPiece& replace_chars, 262 const std::string& replace_with, 263 std::string* output) { 264 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 265 } 266 267 bool RemoveChars(const string16& input, 268 const StringPiece16& remove_chars, 269 string16* output) { 270 return ReplaceChars(input, remove_chars.as_string(), string16(), output); 271 } 272 273 bool RemoveChars(const std::string& input, 274 const StringPiece& remove_chars, 275 std::string* output) { 276 return ReplaceChars(input, remove_chars.as_string(), std::string(), output); 277 } 278 279 template<typename Str> 280 TrimPositions TrimStringT(const Str& input, 281 BasicStringPiece<Str> trim_chars, 282 TrimPositions positions, 283 Str* output) { 284 // Find the edges of leading/trailing whitespace as desired. Need to use 285 // a StringPiece version of input to be able to call find* on it with the 286 // StringPiece version of trim_chars (normally the trim_chars will be a 287 // constant so avoid making a copy). 288 BasicStringPiece<Str> input_piece(input); 289 const size_t last_char = input.length() - 1; 290 const size_t first_good_char = (positions & TRIM_LEADING) ? 291 input_piece.find_first_not_of(trim_chars) : 0; 292 const size_t last_good_char = (positions & TRIM_TRAILING) ? 293 input_piece.find_last_not_of(trim_chars) : last_char; 294 295 // When the string was all trimmed, report that we stripped off characters 296 // from whichever position the caller was interested in. For empty input, we 297 // stripped no characters, but we still need to clear |output|. 298 if (input.empty() || 299 (first_good_char == Str::npos) || (last_good_char == Str::npos)) { 300 bool input_was_empty = input.empty(); // in case output == &input 301 output->clear(); 302 return input_was_empty ? TRIM_NONE : positions; 303 } 304 305 // Trim. 306 *output = 307 input.substr(first_good_char, last_good_char - first_good_char + 1); 308 309 // Return where we trimmed from. 310 return static_cast<TrimPositions>( 311 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 312 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 313 } 314 315 bool TrimString(const string16& input, 316 StringPiece16 trim_chars, 317 string16* output) { 318 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 319 } 320 321 bool TrimString(const std::string& input, 322 StringPiece trim_chars, 323 std::string* output) { 324 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 325 } 326 327 template<typename Str> 328 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, 329 BasicStringPiece<Str> trim_chars, 330 TrimPositions positions) { 331 size_t begin = (positions & TRIM_LEADING) ? 332 input.find_first_not_of(trim_chars) : 0; 333 size_t end = (positions & TRIM_TRAILING) ? 334 input.find_last_not_of(trim_chars) + 1 : input.size(); 335 return input.substr(begin, end - begin); 336 } 337 338 StringPiece16 TrimString(StringPiece16 input, 339 const StringPiece16& trim_chars, 340 TrimPositions positions) { 341 return TrimStringPieceT(input, trim_chars, positions); 342 } 343 344 StringPiece TrimString(StringPiece input, 345 const StringPiece& trim_chars, 346 TrimPositions positions) { 347 return TrimStringPieceT(input, trim_chars, positions); 348 } 349 350 void TruncateUTF8ToByteSize(const std::string& input, 351 const size_t byte_size, 352 std::string* output) { 353 DCHECK(output); 354 if (byte_size > input.length()) { 355 *output = input; 356 return; 357 } 358 DCHECK_LE(byte_size, 359 static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); 360 // Note: This cast is necessary because CBU8_NEXT uses int32_ts. 361 int32_t truncation_length = static_cast<int32_t>(byte_size); 362 int32_t char_index = truncation_length - 1; 363 const char* data = input.data(); 364 365 // Using CBU8, we will move backwards from the truncation point 366 // to the beginning of the string looking for a valid UTF8 367 // character. Once a full UTF8 character is found, we will 368 // truncate the string to the end of that character. 369 while (char_index >= 0) { 370 int32_t prev = char_index; 371 base_icu::UChar32 code_point = 0; 372 CBU8_NEXT(data, char_index, truncation_length, code_point); 373 if (!IsValidCharacter(code_point) || 374 !IsValidCodepoint(code_point)) { 375 char_index = prev - 1; 376 } else { 377 break; 378 } 379 } 380 381 if (char_index >= 0 ) 382 *output = input.substr(0, char_index); 383 else 384 output->clear(); 385 } 386 387 TrimPositions TrimWhitespace(const string16& input, 388 TrimPositions positions, 389 string16* output) { 390 return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); 391 } 392 393 StringPiece16 TrimWhitespace(StringPiece16 input, 394 TrimPositions positions) { 395 return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions); 396 } 397 398 TrimPositions TrimWhitespaceASCII(const std::string& input, 399 TrimPositions positions, 400 std::string* output) { 401 return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); 402 } 403 404 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) { 405 return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions); 406 } 407 408 template<typename STR> 409 STR CollapseWhitespaceT(const STR& text, 410 bool trim_sequences_with_line_breaks) { 411 STR result; 412 result.resize(text.size()); 413 414 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 415 // will trim any leading whitespace. 416 bool in_whitespace = true; 417 bool already_trimmed = true; 418 419 int chars_written = 0; 420 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 421 if (IsUnicodeWhitespace(*i)) { 422 if (!in_whitespace) { 423 // Reduce all whitespace sequences to a single space. 424 in_whitespace = true; 425 result[chars_written++] = L' '; 426 } 427 if (trim_sequences_with_line_breaks && !already_trimmed && 428 ((*i == '\n') || (*i == '\r'))) { 429 // Whitespace sequences containing CR or LF are eliminated entirely. 430 already_trimmed = true; 431 --chars_written; 432 } 433 } else { 434 // Non-whitespace chracters are copied straight across. 435 in_whitespace = false; 436 already_trimmed = false; 437 result[chars_written++] = *i; 438 } 439 } 440 441 if (in_whitespace && !already_trimmed) { 442 // Any trailing whitespace is eliminated. 443 --chars_written; 444 } 445 446 result.resize(chars_written); 447 return result; 448 } 449 450 string16 CollapseWhitespace(const string16& text, 451 bool trim_sequences_with_line_breaks) { 452 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 453 } 454 455 std::string CollapseWhitespaceASCII(const std::string& text, 456 bool trim_sequences_with_line_breaks) { 457 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 458 } 459 460 bool ContainsOnlyChars(const StringPiece& input, 461 const StringPiece& characters) { 462 return input.find_first_not_of(characters) == StringPiece::npos; 463 } 464 465 bool ContainsOnlyChars(const StringPiece16& input, 466 const StringPiece16& characters) { 467 return input.find_first_not_of(characters) == StringPiece16::npos; 468 } 469 470 template <class Char> 471 inline bool DoIsStringASCII(const Char* characters, size_t length) { 472 MachineWord all_char_bits = 0; 473 const Char* end = characters + length; 474 475 // Prologue: align the input. 476 while (!IsAlignedToMachineWord(characters) && characters != end) { 477 all_char_bits |= *characters; 478 ++characters; 479 } 480 481 // Compare the values of CPU word size. 482 const Char* word_end = AlignToMachineWord(end); 483 const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); 484 while (characters < word_end) { 485 all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); 486 characters += loop_increment; 487 } 488 489 // Process the remaining bytes. 490 while (characters != end) { 491 all_char_bits |= *characters; 492 ++characters; 493 } 494 495 MachineWord non_ascii_bit_mask = 496 NonASCIIMask<sizeof(MachineWord), Char>::value(); 497 return !(all_char_bits & non_ascii_bit_mask); 498 } 499 500 bool IsStringASCII(const StringPiece& str) { 501 return DoIsStringASCII(str.data(), str.length()); 502 } 503 504 bool IsStringASCII(const StringPiece16& str) { 505 return DoIsStringASCII(str.data(), str.length()); 506 } 507 508 bool IsStringASCII(const string16& str) { 509 return DoIsStringASCII(str.data(), str.length()); 510 } 511 512 #if defined(WCHAR_T_IS_UTF32) 513 bool IsStringASCII(const std::wstring& str) { 514 return DoIsStringASCII(str.data(), str.length()); 515 } 516 #endif 517 518 bool IsStringUTF8(const StringPiece& str) { 519 const char *src = str.data(); 520 int32_t src_len = static_cast<int32_t>(str.length()); 521 int32_t char_index = 0; 522 523 while (char_index < src_len) { 524 int32_t code_point; 525 CBU8_NEXT(src, char_index, src_len, code_point); 526 if (!IsValidCharacter(code_point)) 527 return false; 528 } 529 return true; 530 } 531 532 // Implementation note: Normally this function will be called with a hardcoded 533 // constant for the lowercase_ascii parameter. Constructing a StringPiece from 534 // a C constant requires running strlen, so the result will be two passes 535 // through the buffers, one to file the length of lowercase_ascii, and one to 536 // compare each letter. 537 // 538 // This function could have taken a const char* to avoid this and only do one 539 // pass through the string. But the strlen is faster than the case-insensitive 540 // compares and lets us early-exit in the case that the strings are different 541 // lengths (will often be the case for non-matches). So whether one approach or 542 // the other will be faster depends on the case. 543 // 544 // The hardcoded strings are typically very short so it doesn't matter, and the 545 // string piece gives additional flexibility for the caller (doesn't have to be 546 // null terminated) so we choose the StringPiece route. 547 template<typename Str> 548 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, 549 StringPiece lowercase_ascii) { 550 if (str.size() != lowercase_ascii.size()) 551 return false; 552 for (size_t i = 0; i < str.size(); i++) { 553 if (ToLowerASCII(str[i]) != lowercase_ascii[i]) 554 return false; 555 } 556 return true; 557 } 558 559 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) { 560 return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii); 561 } 562 563 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) { 564 return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii); 565 } 566 567 bool EqualsASCII(StringPiece16 str, StringPiece ascii) { 568 if (str.length() != ascii.length()) 569 return false; 570 return std::equal(ascii.begin(), ascii.end(), str.begin()); 571 } 572 573 template<typename Str> 574 bool StartsWithT(BasicStringPiece<Str> str, 575 BasicStringPiece<Str> search_for, 576 CompareCase case_sensitivity) { 577 if (search_for.size() > str.size()) 578 return false; 579 580 BasicStringPiece<Str> source = str.substr(0, search_for.size()); 581 582 switch (case_sensitivity) { 583 case CompareCase::SENSITIVE: 584 return source == search_for; 585 586 case CompareCase::INSENSITIVE_ASCII: 587 return std::equal( 588 search_for.begin(), search_for.end(), 589 source.begin(), 590 CaseInsensitiveCompareASCII<typename Str::value_type>()); 591 592 default: 593 NOTREACHED(); 594 return false; 595 } 596 } 597 598 bool StartsWith(StringPiece str, 599 StringPiece search_for, 600 CompareCase case_sensitivity) { 601 return StartsWithT<std::string>(str, search_for, case_sensitivity); 602 } 603 604 bool StartsWith(StringPiece16 str, 605 StringPiece16 search_for, 606 CompareCase case_sensitivity) { 607 return StartsWithT<string16>(str, search_for, case_sensitivity); 608 } 609 610 template <typename Str> 611 bool EndsWithT(BasicStringPiece<Str> str, 612 BasicStringPiece<Str> search_for, 613 CompareCase case_sensitivity) { 614 if (search_for.size() > str.size()) 615 return false; 616 617 BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(), 618 search_for.size()); 619 620 switch (case_sensitivity) { 621 case CompareCase::SENSITIVE: 622 return source == search_for; 623 624 case CompareCase::INSENSITIVE_ASCII: 625 return std::equal( 626 source.begin(), source.end(), 627 search_for.begin(), 628 CaseInsensitiveCompareASCII<typename Str::value_type>()); 629 630 default: 631 NOTREACHED(); 632 return false; 633 } 634 } 635 636 bool EndsWith(StringPiece str, 637 StringPiece search_for, 638 CompareCase case_sensitivity) { 639 return EndsWithT<std::string>(str, search_for, case_sensitivity); 640 } 641 642 bool EndsWith(StringPiece16 str, 643 StringPiece16 search_for, 644 CompareCase case_sensitivity) { 645 return EndsWithT<string16>(str, search_for, case_sensitivity); 646 } 647 648 char HexDigitToInt(wchar_t c) { 649 DCHECK(IsHexDigit(c)); 650 if (c >= '0' && c <= '9') 651 return static_cast<char>(c - '0'); 652 if (c >= 'A' && c <= 'F') 653 return static_cast<char>(c - 'A' + 10); 654 if (c >= 'a' && c <= 'f') 655 return static_cast<char>(c - 'a' + 10); 656 return 0; 657 } 658 659 bool IsUnicodeWhitespace(wchar_t c) { 660 // kWhitespaceWide is a NULL-terminated string 661 for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) { 662 if (*cur == c) 663 return true; 664 } 665 return false; 666 } 667 668 static const char* const kByteStringsUnlocalized[] = { 669 " B", 670 " kB", 671 " MB", 672 " GB", 673 " TB", 674 " PB" 675 }; 676 677 string16 FormatBytesUnlocalized(int64_t bytes) { 678 double unit_amount = static_cast<double>(bytes); 679 size_t dimension = 0; 680 const int kKilo = 1024; 681 while (unit_amount >= kKilo && 682 dimension < arraysize(kByteStringsUnlocalized) - 1) { 683 unit_amount /= kKilo; 684 dimension++; 685 } 686 687 char buf[64]; 688 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 689 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 690 kByteStringsUnlocalized[dimension]); 691 } else { 692 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 693 kByteStringsUnlocalized[dimension]); 694 } 695 696 return ASCIIToUTF16(buf); 697 } 698 699 // Runs in O(n) time in the length of |str|. 700 template<class StringType> 701 void DoReplaceSubstringsAfterOffset(StringType* str, 702 size_t offset, 703 BasicStringPiece<StringType> find_this, 704 BasicStringPiece<StringType> replace_with, 705 bool replace_all) { 706 DCHECK(!find_this.empty()); 707 708 // If the find string doesn't appear, there's nothing to do. 709 offset = str->find(find_this.data(), offset, find_this.size()); 710 if (offset == StringType::npos) 711 return; 712 713 // If we're only replacing one instance, there's no need to do anything 714 // complicated. 715 size_t find_length = find_this.length(); 716 if (!replace_all) { 717 str->replace(offset, find_length, replace_with.data(), replace_with.size()); 718 return; 719 } 720 721 // If the find and replace strings are the same length, we can simply use 722 // replace() on each instance, and finish the entire operation in O(n) time. 723 size_t replace_length = replace_with.length(); 724 if (find_length == replace_length) { 725 do { 726 str->replace(offset, find_length, 727 replace_with.data(), replace_with.size()); 728 offset = str->find(find_this.data(), offset + replace_length, 729 find_this.size()); 730 } while (offset != StringType::npos); 731 return; 732 } 733 734 // Since the find and replace strings aren't the same length, a loop like the 735 // one above would be O(n^2) in the worst case, as replace() will shift the 736 // entire remaining string each time. We need to be more clever to keep 737 // things O(n). 738 // 739 // If we're shortening the string, we can alternate replacements with shifting 740 // forward the intervening characters using memmove(). 741 size_t str_length = str->length(); 742 if (find_length > replace_length) { 743 size_t write_offset = offset; 744 do { 745 if (replace_length) { 746 str->replace(write_offset, replace_length, 747 replace_with.data(), replace_with.size()); 748 write_offset += replace_length; 749 } 750 size_t read_offset = offset + find_length; 751 offset = std::min( 752 str->find(find_this.data(), read_offset, find_this.size()), 753 str_length); 754 size_t length = offset - read_offset; 755 if (length) { 756 memmove(&(*str)[write_offset], &(*str)[read_offset], 757 length * sizeof(typename StringType::value_type)); 758 write_offset += length; 759 } 760 } while (offset < str_length); 761 str->resize(write_offset); 762 return; 763 } 764 765 // We're lengthening the string. We can use alternating replacements and 766 // memmove() calls like above, but we need to precalculate the final string 767 // length and then expand from back-to-front to avoid overwriting the string 768 // as we're reading it, needing to shift, or having to copy to a second string 769 // temporarily. 770 size_t first_match = offset; 771 772 // First, calculate the final length and resize the string. 773 size_t final_length = str_length; 774 size_t expansion = replace_length - find_length; 775 size_t current_match; 776 do { 777 final_length += expansion; 778 // Minor optimization: save this offset into |current_match|, so that on 779 // exit from the loop, |current_match| will point at the last instance of 780 // the find string, and we won't need to find() it again immediately. 781 current_match = offset; 782 offset = str->find(find_this.data(), offset + find_length, 783 find_this.size()); 784 } while (offset != StringType::npos); 785 str->resize(final_length); 786 787 // Now do the replacement loop, working backwards through the string. 788 for (size_t prev_match = str_length, write_offset = final_length; ; 789 current_match = str->rfind(find_this.data(), current_match - 1, 790 find_this.size())) { 791 size_t read_offset = current_match + find_length; 792 size_t length = prev_match - read_offset; 793 if (length) { 794 write_offset -= length; 795 memmove(&(*str)[write_offset], &(*str)[read_offset], 796 length * sizeof(typename StringType::value_type)); 797 } 798 write_offset -= replace_length; 799 str->replace(write_offset, replace_length, 800 replace_with.data(), replace_with.size()); 801 if (current_match == first_match) 802 return; 803 prev_match = current_match; 804 } 805 } 806 807 void ReplaceFirstSubstringAfterOffset(string16* str, 808 size_t start_offset, 809 StringPiece16 find_this, 810 StringPiece16 replace_with) { 811 DoReplaceSubstringsAfterOffset<string16>( 812 str, start_offset, find_this, replace_with, false); // Replace first. 813 } 814 815 void ReplaceFirstSubstringAfterOffset(std::string* str, 816 size_t start_offset, 817 StringPiece find_this, 818 StringPiece replace_with) { 819 DoReplaceSubstringsAfterOffset<std::string>( 820 str, start_offset, find_this, replace_with, false); // Replace first. 821 } 822 823 void ReplaceSubstringsAfterOffset(string16* str, 824 size_t start_offset, 825 StringPiece16 find_this, 826 StringPiece16 replace_with) { 827 DoReplaceSubstringsAfterOffset<string16>( 828 str, start_offset, find_this, replace_with, true); // Replace all. 829 } 830 831 void ReplaceSubstringsAfterOffset(std::string* str, 832 size_t start_offset, 833 StringPiece find_this, 834 StringPiece replace_with) { 835 DoReplaceSubstringsAfterOffset<std::string>( 836 str, start_offset, find_this, replace_with, true); // Replace all. 837 } 838 839 template <class string_type> 840 inline typename string_type::value_type* WriteIntoT(string_type* str, 841 size_t length_with_null) { 842 DCHECK_GT(length_with_null, 1u); 843 str->reserve(length_with_null); 844 str->resize(length_with_null - 1); 845 return &((*str)[0]); 846 } 847 848 char* WriteInto(std::string* str, size_t length_with_null) { 849 return WriteIntoT(str, length_with_null); 850 } 851 852 char16* WriteInto(string16* str, size_t length_with_null) { 853 return WriteIntoT(str, length_with_null); 854 } 855 856 template<typename STR> 857 static STR JoinStringT(const std::vector<STR>& parts, 858 BasicStringPiece<STR> sep) { 859 if (parts.empty()) 860 return STR(); 861 862 STR result(parts[0]); 863 auto iter = parts.begin(); 864 ++iter; 865 866 for (; iter != parts.end(); ++iter) { 867 sep.AppendToString(&result); 868 result += *iter; 869 } 870 871 return result; 872 } 873 874 std::string JoinString(const std::vector<std::string>& parts, 875 StringPiece separator) { 876 return JoinStringT(parts, separator); 877 } 878 879 string16 JoinString(const std::vector<string16>& parts, 880 StringPiece16 separator) { 881 return JoinStringT(parts, separator); 882 } 883 884 template<class FormatStringType, class OutStringType> 885 OutStringType DoReplaceStringPlaceholders( 886 const FormatStringType& format_string, 887 const std::vector<OutStringType>& subst, 888 std::vector<size_t>* offsets) { 889 size_t substitutions = subst.size(); 890 DCHECK_LT(substitutions, 10U); 891 892 size_t sub_length = 0; 893 for (const auto& cur : subst) 894 sub_length += cur.length(); 895 896 OutStringType formatted; 897 formatted.reserve(format_string.length() + sub_length); 898 899 std::vector<ReplacementOffset> r_offsets; 900 for (auto i = format_string.begin(); i != format_string.end(); ++i) { 901 if ('$' == *i) { 902 if (i + 1 != format_string.end()) { 903 ++i; 904 if ('$' == *i) { 905 while (i != format_string.end() && '$' == *i) { 906 formatted.push_back('$'); 907 ++i; 908 } 909 --i; 910 } else { 911 if (*i < '1' || *i > '9') { 912 DLOG(ERROR) << "Invalid placeholder: $" << *i; 913 continue; 914 } 915 uintptr_t index = *i - '1'; 916 if (offsets) { 917 ReplacementOffset r_offset(index, 918 static_cast<int>(formatted.size())); 919 r_offsets.insert(std::lower_bound(r_offsets.begin(), 920 r_offsets.end(), 921 r_offset, 922 &CompareParameter), 923 r_offset); 924 } 925 if (index < substitutions) 926 formatted.append(subst.at(index)); 927 } 928 } 929 } else { 930 formatted.push_back(*i); 931 } 932 } 933 if (offsets) { 934 for (const auto& cur : r_offsets) 935 offsets->push_back(cur.offset); 936 } 937 return formatted; 938 } 939 940 string16 ReplaceStringPlaceholders(const string16& format_string, 941 const std::vector<string16>& subst, 942 std::vector<size_t>* offsets) { 943 return DoReplaceStringPlaceholders(format_string, subst, offsets); 944 } 945 946 std::string ReplaceStringPlaceholders(const StringPiece& format_string, 947 const std::vector<std::string>& subst, 948 std::vector<size_t>* offsets) { 949 return DoReplaceStringPlaceholders(format_string, subst, offsets); 950 } 951 952 string16 ReplaceStringPlaceholders(const string16& format_string, 953 const string16& a, 954 size_t* offset) { 955 std::vector<size_t> offsets; 956 std::vector<string16> subst; 957 subst.push_back(a); 958 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 959 960 DCHECK_EQ(1U, offsets.size()); 961 if (offset) 962 *offset = offsets[0]; 963 return result; 964 } 965 966 // The following code is compatible with the OpenBSD lcpy interface. See: 967 // http://www.gratisoft.us/todd/papers/strlcpy.html 968 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 969 970 namespace { 971 972 template <typename CHAR> 973 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 974 for (size_t i = 0; i < dst_size; ++i) { 975 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 976 return i; 977 } 978 979 // We were left off at dst_size. We over copied 1 byte. Null terminate. 980 if (dst_size != 0) 981 dst[dst_size - 1] = 0; 982 983 // Count the rest of the |src|, and return it's length in characters. 984 while (src[dst_size]) ++dst_size; 985 return dst_size; 986 } 987 988 } // namespace 989 990 size_t strlcpy(char* dst, const char* src, size_t dst_size) { 991 return lcpyT<char>(dst, src, dst_size); 992 } 993 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 994 return lcpyT<wchar_t>(dst, src, dst_size); 995 } 996 997 } // namespace base 998