1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_util.h" 6 7 #include <ctype.h> 8 #include <errno.h> 9 #include <math.h> 10 #include <stdarg.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <time.h> 16 #include <wchar.h> 17 #include <wctype.h> 18 19 #include <algorithm> 20 #include <limits> 21 #include <vector> 22 23 #include "base/logging.h" 24 #include "base/macros.h" 25 #include "base/memory/singleton.h" 26 #include "base/strings/string_split.h" 27 #include "base/strings/utf_string_conversion_utils.h" 28 #include "base/strings/utf_string_conversions.h" 29 #include "base/third_party/icu/icu_utf.h" 30 #include "build/build_config.h" 31 32 namespace base { 33 34 namespace { 35 36 // Force the singleton used by EmptyString[16] to be a unique type. This 37 // prevents other code that might accidentally use Singleton<string> from 38 // getting our internal one. 39 struct EmptyStrings { 40 EmptyStrings() {} 41 const std::string s; 42 const string16 s16; 43 44 static EmptyStrings* GetInstance() { 45 return Singleton<EmptyStrings>::get(); 46 } 47 }; 48 49 // Used by ReplaceStringPlaceholders to track the position in the string of 50 // replaced parameters. 51 struct ReplacementOffset { 52 ReplacementOffset(uintptr_t parameter, size_t offset) 53 : parameter(parameter), 54 offset(offset) {} 55 56 // Index of the parameter. 57 uintptr_t parameter; 58 59 // Starting position in the string. 60 size_t offset; 61 }; 62 63 static bool CompareParameter(const ReplacementOffset& elem1, 64 const ReplacementOffset& elem2) { 65 return elem1.parameter < elem2.parameter; 66 } 67 68 // Assuming that a pointer is the size of a "machine word", then 69 // uintptr_t is an integer type that is also a machine word. 70 typedef uintptr_t MachineWord; 71 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1; 72 73 inline bool IsAlignedToMachineWord(const void* pointer) { 74 return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask); 75 } 76 77 template<typename T> inline T* AlignToMachineWord(T* pointer) { 78 return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) & 79 ~kMachineWordAlignmentMask); 80 } 81 82 template<size_t size, typename CharacterType> struct NonASCIIMask; 83 template<> struct NonASCIIMask<4, char16> { 84 static inline uint32_t value() { return 0xFF80FF80U; } 85 }; 86 template<> struct NonASCIIMask<4, char> { 87 static inline uint32_t value() { return 0x80808080U; } 88 }; 89 template<> struct NonASCIIMask<8, char16> { 90 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } 91 }; 92 template<> struct NonASCIIMask<8, char> { 93 static inline uint64_t value() { return 0x8080808080808080ULL; } 94 }; 95 #if defined(WCHAR_T_IS_UTF32) 96 template<> struct NonASCIIMask<4, wchar_t> { 97 static inline uint32_t value() { return 0xFFFFFF80U; } 98 }; 99 template<> struct NonASCIIMask<8, wchar_t> { 100 static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; } 101 }; 102 #endif // WCHAR_T_IS_UTF32 103 104 } // namespace 105 106 bool IsWprintfFormatPortable(const wchar_t* format) { 107 for (const wchar_t* position = format; *position != '\0'; ++position) { 108 if (*position == '%') { 109 bool in_specification = true; 110 bool modifier_l = false; 111 while (in_specification) { 112 // Eat up characters until reaching a known specifier. 113 if (*++position == '\0') { 114 // The format string ended in the middle of a specification. Call 115 // it portable because no unportable specifications were found. The 116 // string is equally broken on all platforms. 117 return true; 118 } 119 120 if (*position == 'l') { 121 // 'l' is the only thing that can save the 's' and 'c' specifiers. 122 modifier_l = true; 123 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 124 *position == 'S' || *position == 'C' || *position == 'F' || 125 *position == 'D' || *position == 'O' || *position == 'U') { 126 // Not portable. 127 return false; 128 } 129 130 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 131 // Portable, keep scanning the rest of the format string. 132 in_specification = false; 133 } 134 } 135 } 136 } 137 138 return true; 139 } 140 141 namespace { 142 143 template<typename StringType> 144 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { 145 StringType ret; 146 ret.reserve(str.size()); 147 for (size_t i = 0; i < str.size(); i++) 148 ret.push_back(ToLowerASCII(str[i])); 149 return ret; 150 } 151 152 template<typename StringType> 153 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { 154 StringType ret; 155 ret.reserve(str.size()); 156 for (size_t i = 0; i < str.size(); i++) 157 ret.push_back(ToUpperASCII(str[i])); 158 return ret; 159 } 160 161 } // namespace 162 163 std::string ToLowerASCII(StringPiece str) { 164 return ToLowerASCIIImpl<std::string>(str); 165 } 166 167 string16 ToLowerASCII(StringPiece16 str) { 168 return ToLowerASCIIImpl<string16>(str); 169 } 170 171 std::string ToUpperASCII(StringPiece str) { 172 return ToUpperASCIIImpl<std::string>(str); 173 } 174 175 string16 ToUpperASCII(StringPiece16 str) { 176 return ToUpperASCIIImpl<string16>(str); 177 } 178 179 template<class StringType> 180 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, 181 BasicStringPiece<StringType> b) { 182 // Find the first characters that aren't equal and compare them. If the end 183 // of one of the strings is found before a nonequal character, the lengths 184 // of the strings are compared. 185 size_t i = 0; 186 while (i < a.length() && i < b.length()) { 187 typename StringType::value_type lower_a = ToLowerASCII(a[i]); 188 typename StringType::value_type lower_b = ToLowerASCII(b[i]); 189 if (lower_a < lower_b) 190 return -1; 191 if (lower_a > lower_b) 192 return 1; 193 i++; 194 } 195 196 // End of one string hit before finding a different character. Expect the 197 // common case to be "strings equal" at this point so check that first. 198 if (a.length() == b.length()) 199 return 0; 200 201 if (a.length() < b.length()) 202 return -1; 203 return 1; 204 } 205 206 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) { 207 return CompareCaseInsensitiveASCIIT<std::string>(a, b); 208 } 209 210 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { 211 return CompareCaseInsensitiveASCIIT<string16>(a, b); 212 } 213 214 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { 215 if (a.length() != b.length()) 216 return false; 217 return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; 218 } 219 220 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { 221 if (a.length() != b.length()) 222 return false; 223 return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0; 224 } 225 226 const std::string& EmptyString() { 227 return EmptyStrings::GetInstance()->s; 228 } 229 230 const string16& EmptyString16() { 231 return EmptyStrings::GetInstance()->s16; 232 } 233 234 template<typename STR> 235 bool ReplaceCharsT(const STR& input, 236 const STR& replace_chars, 237 const STR& replace_with, 238 STR* output) { 239 bool removed = false; 240 size_t replace_length = replace_with.length(); 241 242 *output = input; 243 244 size_t found = output->find_first_of(replace_chars); 245 while (found != STR::npos) { 246 removed = true; 247 output->replace(found, 1, replace_with); 248 found = output->find_first_of(replace_chars, found + replace_length); 249 } 250 251 return removed; 252 } 253 254 bool ReplaceChars(const string16& input, 255 const StringPiece16& replace_chars, 256 const string16& replace_with, 257 string16* output) { 258 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 259 } 260 261 bool ReplaceChars(const std::string& input, 262 const StringPiece& replace_chars, 263 const std::string& replace_with, 264 std::string* output) { 265 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 266 } 267 268 bool RemoveChars(const string16& input, 269 const StringPiece16& remove_chars, 270 string16* output) { 271 return ReplaceChars(input, remove_chars.as_string(), string16(), output); 272 } 273 274 bool RemoveChars(const std::string& input, 275 const StringPiece& remove_chars, 276 std::string* output) { 277 return ReplaceChars(input, remove_chars.as_string(), std::string(), output); 278 } 279 280 template<typename Str> 281 TrimPositions TrimStringT(const Str& input, 282 BasicStringPiece<Str> trim_chars, 283 TrimPositions positions, 284 Str* output) { 285 // Find the edges of leading/trailing whitespace as desired. Need to use 286 // a StringPiece version of input to be able to call find* on it with the 287 // StringPiece version of trim_chars (normally the trim_chars will be a 288 // constant so avoid making a copy). 289 BasicStringPiece<Str> input_piece(input); 290 const size_t last_char = input.length() - 1; 291 const size_t first_good_char = (positions & TRIM_LEADING) ? 292 input_piece.find_first_not_of(trim_chars) : 0; 293 const size_t last_good_char = (positions & TRIM_TRAILING) ? 294 input_piece.find_last_not_of(trim_chars) : last_char; 295 296 // When the string was all trimmed, report that we stripped off characters 297 // from whichever position the caller was interested in. For empty input, we 298 // stripped no characters, but we still need to clear |output|. 299 if (input.empty() || 300 (first_good_char == Str::npos) || (last_good_char == Str::npos)) { 301 bool input_was_empty = input.empty(); // in case output == &input 302 output->clear(); 303 return input_was_empty ? TRIM_NONE : positions; 304 } 305 306 // Trim. 307 *output = 308 input.substr(first_good_char, last_good_char - first_good_char + 1); 309 310 // Return where we trimmed from. 311 return static_cast<TrimPositions>( 312 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 313 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 314 } 315 316 bool TrimString(const string16& input, 317 StringPiece16 trim_chars, 318 string16* output) { 319 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 320 } 321 322 bool TrimString(const std::string& input, 323 StringPiece trim_chars, 324 std::string* output) { 325 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 326 } 327 328 template<typename Str> 329 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, 330 BasicStringPiece<Str> trim_chars, 331 TrimPositions positions) { 332 size_t begin = (positions & TRIM_LEADING) ? 333 input.find_first_not_of(trim_chars) : 0; 334 size_t end = (positions & TRIM_TRAILING) ? 335 input.find_last_not_of(trim_chars) + 1 : input.size(); 336 return input.substr(begin, end - begin); 337 } 338 339 StringPiece16 TrimString(StringPiece16 input, 340 const StringPiece16& trim_chars, 341 TrimPositions positions) { 342 return TrimStringPieceT(input, trim_chars, positions); 343 } 344 345 StringPiece TrimString(StringPiece input, 346 const StringPiece& trim_chars, 347 TrimPositions positions) { 348 return TrimStringPieceT(input, trim_chars, positions); 349 } 350 351 void TruncateUTF8ToByteSize(const std::string& input, 352 const size_t byte_size, 353 std::string* output) { 354 DCHECK(output); 355 if (byte_size > input.length()) { 356 *output = input; 357 return; 358 } 359 DCHECK_LE(byte_size, 360 static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); 361 // Note: This cast is necessary because CBU8_NEXT uses int32_ts. 362 int32_t truncation_length = static_cast<int32_t>(byte_size); 363 int32_t char_index = truncation_length - 1; 364 const char* data = input.data(); 365 366 // Using CBU8, we will move backwards from the truncation point 367 // to the beginning of the string looking for a valid UTF8 368 // character. Once a full UTF8 character is found, we will 369 // truncate the string to the end of that character. 370 while (char_index >= 0) { 371 int32_t prev = char_index; 372 base_icu::UChar32 code_point = 0; 373 CBU8_NEXT(data, char_index, truncation_length, code_point); 374 if (!IsValidCharacter(code_point) || 375 !IsValidCodepoint(code_point)) { 376 char_index = prev - 1; 377 } else { 378 break; 379 } 380 } 381 382 if (char_index >= 0 ) 383 *output = input.substr(0, char_index); 384 else 385 output->clear(); 386 } 387 388 TrimPositions TrimWhitespace(const string16& input, 389 TrimPositions positions, 390 string16* output) { 391 return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); 392 } 393 394 StringPiece16 TrimWhitespace(StringPiece16 input, 395 TrimPositions positions) { 396 return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions); 397 } 398 399 TrimPositions TrimWhitespaceASCII(const std::string& input, 400 TrimPositions positions, 401 std::string* output) { 402 return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); 403 } 404 405 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) { 406 return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions); 407 } 408 409 template<typename STR> 410 STR CollapseWhitespaceT(const STR& text, 411 bool trim_sequences_with_line_breaks) { 412 STR result; 413 result.resize(text.size()); 414 415 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 416 // will trim any leading whitespace. 417 bool in_whitespace = true; 418 bool already_trimmed = true; 419 420 int chars_written = 0; 421 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 422 if (IsUnicodeWhitespace(*i)) { 423 if (!in_whitespace) { 424 // Reduce all whitespace sequences to a single space. 425 in_whitespace = true; 426 result[chars_written++] = L' '; 427 } 428 if (trim_sequences_with_line_breaks && !already_trimmed && 429 ((*i == '\n') || (*i == '\r'))) { 430 // Whitespace sequences containing CR or LF are eliminated entirely. 431 already_trimmed = true; 432 --chars_written; 433 } 434 } else { 435 // Non-whitespace chracters are copied straight across. 436 in_whitespace = false; 437 already_trimmed = false; 438 result[chars_written++] = *i; 439 } 440 } 441 442 if (in_whitespace && !already_trimmed) { 443 // Any trailing whitespace is eliminated. 444 --chars_written; 445 } 446 447 result.resize(chars_written); 448 return result; 449 } 450 451 string16 CollapseWhitespace(const string16& text, 452 bool trim_sequences_with_line_breaks) { 453 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 454 } 455 456 std::string CollapseWhitespaceASCII(const std::string& text, 457 bool trim_sequences_with_line_breaks) { 458 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 459 } 460 461 bool ContainsOnlyChars(const StringPiece& input, 462 const StringPiece& characters) { 463 return input.find_first_not_of(characters) == StringPiece::npos; 464 } 465 466 bool ContainsOnlyChars(const StringPiece16& input, 467 const StringPiece16& characters) { 468 return input.find_first_not_of(characters) == StringPiece16::npos; 469 } 470 471 template <class Char> 472 inline bool DoIsStringASCII(const Char* characters, size_t length) { 473 MachineWord all_char_bits = 0; 474 const Char* end = characters + length; 475 476 // Prologue: align the input. 477 while (!IsAlignedToMachineWord(characters) && characters != end) { 478 all_char_bits |= *characters; 479 ++characters; 480 } 481 482 // Compare the values of CPU word size. 483 const Char* word_end = AlignToMachineWord(end); 484 const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); 485 while (characters < word_end) { 486 all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); 487 characters += loop_increment; 488 } 489 490 // Process the remaining bytes. 491 while (characters != end) { 492 all_char_bits |= *characters; 493 ++characters; 494 } 495 496 MachineWord non_ascii_bit_mask = 497 NonASCIIMask<sizeof(MachineWord), Char>::value(); 498 return !(all_char_bits & non_ascii_bit_mask); 499 } 500 501 bool IsStringASCII(const StringPiece& str) { 502 return DoIsStringASCII(str.data(), str.length()); 503 } 504 505 bool IsStringASCII(const StringPiece16& str) { 506 return DoIsStringASCII(str.data(), str.length()); 507 } 508 509 bool IsStringASCII(const string16& str) { 510 return DoIsStringASCII(str.data(), str.length()); 511 } 512 513 #if defined(WCHAR_T_IS_UTF32) 514 bool IsStringASCII(const std::wstring& str) { 515 return DoIsStringASCII(str.data(), str.length()); 516 } 517 #endif 518 519 bool IsStringUTF8(const StringPiece& str) { 520 const char *src = str.data(); 521 int32_t src_len = static_cast<int32_t>(str.length()); 522 int32_t char_index = 0; 523 524 while (char_index < src_len) { 525 int32_t code_point; 526 CBU8_NEXT(src, char_index, src_len, code_point); 527 if (!IsValidCharacter(code_point)) 528 return false; 529 } 530 return true; 531 } 532 533 // Implementation note: Normally this function will be called with a hardcoded 534 // constant for the lowercase_ascii parameter. Constructing a StringPiece from 535 // a C constant requires running strlen, so the result will be two passes 536 // through the buffers, one to file the length of lowercase_ascii, and one to 537 // compare each letter. 538 // 539 // This function could have taken a const char* to avoid this and only do one 540 // pass through the string. But the strlen is faster than the case-insensitive 541 // compares and lets us early-exit in the case that the strings are different 542 // lengths (will often be the case for non-matches). So whether one approach or 543 // the other will be faster depends on the case. 544 // 545 // The hardcoded strings are typically very short so it doesn't matter, and the 546 // string piece gives additional flexibility for the caller (doesn't have to be 547 // null terminated) so we choose the StringPiece route. 548 template<typename Str> 549 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, 550 StringPiece lowercase_ascii) { 551 if (str.size() != lowercase_ascii.size()) 552 return false; 553 for (size_t i = 0; i < str.size(); i++) { 554 if (ToLowerASCII(str[i]) != lowercase_ascii[i]) 555 return false; 556 } 557 return true; 558 } 559 560 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) { 561 return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii); 562 } 563 564 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) { 565 return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii); 566 } 567 568 bool EqualsASCII(StringPiece16 str, StringPiece ascii) { 569 if (str.length() != ascii.length()) 570 return false; 571 return std::equal(ascii.begin(), ascii.end(), str.begin()); 572 } 573 574 template<typename Str> 575 bool StartsWithT(BasicStringPiece<Str> str, 576 BasicStringPiece<Str> search_for, 577 CompareCase case_sensitivity) { 578 if (search_for.size() > str.size()) 579 return false; 580 581 BasicStringPiece<Str> source = str.substr(0, search_for.size()); 582 583 switch (case_sensitivity) { 584 case CompareCase::SENSITIVE: 585 return source == search_for; 586 587 case CompareCase::INSENSITIVE_ASCII: 588 return std::equal( 589 search_for.begin(), search_for.end(), 590 source.begin(), 591 CaseInsensitiveCompareASCII<typename Str::value_type>()); 592 593 default: 594 NOTREACHED(); 595 return false; 596 } 597 } 598 599 bool StartsWith(StringPiece str, 600 StringPiece search_for, 601 CompareCase case_sensitivity) { 602 return StartsWithT<std::string>(str, search_for, case_sensitivity); 603 } 604 605 bool StartsWith(StringPiece16 str, 606 StringPiece16 search_for, 607 CompareCase case_sensitivity) { 608 return StartsWithT<string16>(str, search_for, case_sensitivity); 609 } 610 611 template <typename Str> 612 bool EndsWithT(BasicStringPiece<Str> str, 613 BasicStringPiece<Str> search_for, 614 CompareCase case_sensitivity) { 615 if (search_for.size() > str.size()) 616 return false; 617 618 BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(), 619 search_for.size()); 620 621 switch (case_sensitivity) { 622 case CompareCase::SENSITIVE: 623 return source == search_for; 624 625 case CompareCase::INSENSITIVE_ASCII: 626 return std::equal( 627 source.begin(), source.end(), 628 search_for.begin(), 629 CaseInsensitiveCompareASCII<typename Str::value_type>()); 630 631 default: 632 NOTREACHED(); 633 return false; 634 } 635 } 636 637 bool EndsWith(StringPiece str, 638 StringPiece search_for, 639 CompareCase case_sensitivity) { 640 return EndsWithT<std::string>(str, search_for, case_sensitivity); 641 } 642 643 bool EndsWith(StringPiece16 str, 644 StringPiece16 search_for, 645 CompareCase case_sensitivity) { 646 return EndsWithT<string16>(str, search_for, case_sensitivity); 647 } 648 649 char HexDigitToInt(wchar_t c) { 650 DCHECK(IsHexDigit(c)); 651 if (c >= '0' && c <= '9') 652 return static_cast<char>(c - '0'); 653 if (c >= 'A' && c <= 'F') 654 return static_cast<char>(c - 'A' + 10); 655 if (c >= 'a' && c <= 'f') 656 return static_cast<char>(c - 'a' + 10); 657 return 0; 658 } 659 660 bool IsUnicodeWhitespace(wchar_t c) { 661 // kWhitespaceWide is a NULL-terminated string 662 for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) { 663 if (*cur == c) 664 return true; 665 } 666 return false; 667 } 668 669 static const char* const kByteStringsUnlocalized[] = { 670 " B", 671 " kB", 672 " MB", 673 " GB", 674 " TB", 675 " PB" 676 }; 677 678 string16 FormatBytesUnlocalized(int64_t bytes) { 679 double unit_amount = static_cast<double>(bytes); 680 size_t dimension = 0; 681 const int kKilo = 1024; 682 while (unit_amount >= kKilo && 683 dimension < arraysize(kByteStringsUnlocalized) - 1) { 684 unit_amount /= kKilo; 685 dimension++; 686 } 687 688 char buf[64]; 689 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 690 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 691 kByteStringsUnlocalized[dimension]); 692 } else { 693 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 694 kByteStringsUnlocalized[dimension]); 695 } 696 697 return ASCIIToUTF16(buf); 698 } 699 700 // Runs in O(n) time in the length of |str|. 701 template<class StringType> 702 void DoReplaceSubstringsAfterOffset(StringType* str, 703 size_t offset, 704 BasicStringPiece<StringType> find_this, 705 BasicStringPiece<StringType> replace_with, 706 bool replace_all) { 707 DCHECK(!find_this.empty()); 708 709 // If the find string doesn't appear, there's nothing to do. 710 offset = str->find(find_this.data(), offset, find_this.size()); 711 if (offset == StringType::npos) 712 return; 713 714 // If we're only replacing one instance, there's no need to do anything 715 // complicated. 716 size_t find_length = find_this.length(); 717 if (!replace_all) { 718 str->replace(offset, find_length, replace_with.data(), replace_with.size()); 719 return; 720 } 721 722 // If the find and replace strings are the same length, we can simply use 723 // replace() on each instance, and finish the entire operation in O(n) time. 724 size_t replace_length = replace_with.length(); 725 if (find_length == replace_length) { 726 do { 727 str->replace(offset, find_length, 728 replace_with.data(), replace_with.size()); 729 offset = str->find(find_this.data(), offset + replace_length, 730 find_this.size()); 731 } while (offset != StringType::npos); 732 return; 733 } 734 735 // Since the find and replace strings aren't the same length, a loop like the 736 // one above would be O(n^2) in the worst case, as replace() will shift the 737 // entire remaining string each time. We need to be more clever to keep 738 // things O(n). 739 // 740 // If we're shortening the string, we can alternate replacements with shifting 741 // forward the intervening characters using memmove(). 742 size_t str_length = str->length(); 743 if (find_length > replace_length) { 744 size_t write_offset = offset; 745 do { 746 if (replace_length) { 747 str->replace(write_offset, replace_length, 748 replace_with.data(), replace_with.size()); 749 write_offset += replace_length; 750 } 751 size_t read_offset = offset + find_length; 752 offset = std::min( 753 str->find(find_this.data(), read_offset, find_this.size()), 754 str_length); 755 size_t length = offset - read_offset; 756 if (length) { 757 memmove(&(*str)[write_offset], &(*str)[read_offset], 758 length * sizeof(typename StringType::value_type)); 759 write_offset += length; 760 } 761 } while (offset < str_length); 762 str->resize(write_offset); 763 return; 764 } 765 766 // We're lengthening the string. We can use alternating replacements and 767 // memmove() calls like above, but we need to precalculate the final string 768 // length and then expand from back-to-front to avoid overwriting the string 769 // as we're reading it, needing to shift, or having to copy to a second string 770 // temporarily. 771 size_t first_match = offset; 772 773 // First, calculate the final length and resize the string. 774 size_t final_length = str_length; 775 size_t expansion = replace_length - find_length; 776 size_t current_match; 777 do { 778 final_length += expansion; 779 // Minor optimization: save this offset into |current_match|, so that on 780 // exit from the loop, |current_match| will point at the last instance of 781 // the find string, and we won't need to find() it again immediately. 782 current_match = offset; 783 offset = str->find(find_this.data(), offset + find_length, 784 find_this.size()); 785 } while (offset != StringType::npos); 786 str->resize(final_length); 787 788 // Now do the replacement loop, working backwards through the string. 789 for (size_t prev_match = str_length, write_offset = final_length; ; 790 current_match = str->rfind(find_this.data(), current_match - 1, 791 find_this.size())) { 792 size_t read_offset = current_match + find_length; 793 size_t length = prev_match - read_offset; 794 if (length) { 795 write_offset -= length; 796 memmove(&(*str)[write_offset], &(*str)[read_offset], 797 length * sizeof(typename StringType::value_type)); 798 } 799 write_offset -= replace_length; 800 str->replace(write_offset, replace_length, 801 replace_with.data(), replace_with.size()); 802 if (current_match == first_match) 803 return; 804 prev_match = current_match; 805 } 806 } 807 808 void ReplaceFirstSubstringAfterOffset(string16* str, 809 size_t start_offset, 810 StringPiece16 find_this, 811 StringPiece16 replace_with) { 812 DoReplaceSubstringsAfterOffset<string16>( 813 str, start_offset, find_this, replace_with, false); // Replace first. 814 } 815 816 void ReplaceFirstSubstringAfterOffset(std::string* str, 817 size_t start_offset, 818 StringPiece find_this, 819 StringPiece replace_with) { 820 DoReplaceSubstringsAfterOffset<std::string>( 821 str, start_offset, find_this, replace_with, false); // Replace first. 822 } 823 824 void ReplaceSubstringsAfterOffset(string16* str, 825 size_t start_offset, 826 StringPiece16 find_this, 827 StringPiece16 replace_with) { 828 DoReplaceSubstringsAfterOffset<string16>( 829 str, start_offset, find_this, replace_with, true); // Replace all. 830 } 831 832 void ReplaceSubstringsAfterOffset(std::string* str, 833 size_t start_offset, 834 StringPiece find_this, 835 StringPiece replace_with) { 836 DoReplaceSubstringsAfterOffset<std::string>( 837 str, start_offset, find_this, replace_with, true); // Replace all. 838 } 839 840 template <class string_type> 841 inline typename string_type::value_type* WriteIntoT(string_type* str, 842 size_t length_with_null) { 843 DCHECK_GT(length_with_null, 1u); 844 str->reserve(length_with_null); 845 str->resize(length_with_null - 1); 846 return &((*str)[0]); 847 } 848 849 char* WriteInto(std::string* str, size_t length_with_null) { 850 return WriteIntoT(str, length_with_null); 851 } 852 853 char16* WriteInto(string16* str, size_t length_with_null) { 854 return WriteIntoT(str, length_with_null); 855 } 856 857 template<typename STR> 858 static STR JoinStringT(const std::vector<STR>& parts, 859 BasicStringPiece<STR> sep) { 860 if (parts.empty()) 861 return STR(); 862 863 STR result(parts[0]); 864 auto iter = parts.begin(); 865 ++iter; 866 867 for (; iter != parts.end(); ++iter) { 868 sep.AppendToString(&result); 869 result += *iter; 870 } 871 872 return result; 873 } 874 875 std::string JoinString(const std::vector<std::string>& parts, 876 StringPiece separator) { 877 return JoinStringT(parts, separator); 878 } 879 880 string16 JoinString(const std::vector<string16>& parts, 881 StringPiece16 separator) { 882 return JoinStringT(parts, separator); 883 } 884 885 template<class FormatStringType, class OutStringType> 886 OutStringType DoReplaceStringPlaceholders( 887 const FormatStringType& format_string, 888 const std::vector<OutStringType>& subst, 889 std::vector<size_t>* offsets) { 890 size_t substitutions = subst.size(); 891 892 size_t sub_length = 0; 893 for (const auto& cur : subst) 894 sub_length += cur.length(); 895 896 OutStringType formatted; 897 formatted.reserve(format_string.length() + sub_length); 898 899 std::vector<ReplacementOffset> r_offsets; 900 for (auto i = format_string.begin(); i != format_string.end(); ++i) { 901 if ('$' == *i) { 902 if (i + 1 != format_string.end()) { 903 ++i; 904 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 905 if ('$' == *i) { 906 while (i != format_string.end() && '$' == *i) { 907 formatted.push_back('$'); 908 ++i; 909 } 910 --i; 911 } else { 912 uintptr_t index = 0; 913 while (i != format_string.end() && '0' <= *i && *i <= '9') { 914 index *= 10; 915 index += *i - '0'; 916 ++i; 917 } 918 --i; 919 index -= 1; 920 if (offsets) { 921 ReplacementOffset r_offset(index, 922 static_cast<int>(formatted.size())); 923 r_offsets.insert(std::lower_bound(r_offsets.begin(), 924 r_offsets.end(), 925 r_offset, 926 &CompareParameter), 927 r_offset); 928 } 929 if (index < substitutions) 930 formatted.append(subst.at(index)); 931 } 932 } 933 } else { 934 formatted.push_back(*i); 935 } 936 } 937 if (offsets) { 938 for (const auto& cur : r_offsets) 939 offsets->push_back(cur.offset); 940 } 941 return formatted; 942 } 943 944 string16 ReplaceStringPlaceholders(const string16& format_string, 945 const std::vector<string16>& subst, 946 std::vector<size_t>* offsets) { 947 return DoReplaceStringPlaceholders(format_string, subst, offsets); 948 } 949 950 std::string ReplaceStringPlaceholders(const StringPiece& format_string, 951 const std::vector<std::string>& subst, 952 std::vector<size_t>* offsets) { 953 return DoReplaceStringPlaceholders(format_string, subst, offsets); 954 } 955 956 string16 ReplaceStringPlaceholders(const string16& format_string, 957 const string16& a, 958 size_t* offset) { 959 std::vector<size_t> offsets; 960 std::vector<string16> subst; 961 subst.push_back(a); 962 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 963 964 DCHECK_EQ(1U, offsets.size()); 965 if (offset) 966 *offset = offsets[0]; 967 return result; 968 } 969 970 // The following code is compatible with the OpenBSD lcpy interface. See: 971 // http://www.gratisoft.us/todd/papers/strlcpy.html 972 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 973 974 namespace { 975 976 template <typename CHAR> 977 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 978 for (size_t i = 0; i < dst_size; ++i) { 979 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 980 return i; 981 } 982 983 // We were left off at dst_size. We over copied 1 byte. Null terminate. 984 if (dst_size != 0) 985 dst[dst_size - 1] = 0; 986 987 // Count the rest of the |src|, and return it's length in characters. 988 while (src[dst_size]) ++dst_size; 989 return dst_size; 990 } 991 992 } // namespace 993 994 size_t strlcpy(char* dst, const char* src, size_t dst_size) { 995 return lcpyT<char>(dst, src, dst_size); 996 } 997 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 998 return lcpyT<wchar_t>(dst, src, dst_size); 999 } 1000 1001 } // namespace base 1002