1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_util.h" 6 7 #include <ctype.h> 8 #include <errno.h> 9 #include <math.h> 10 #include <stdarg.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <time.h> 15 #include <wchar.h> 16 #include <wctype.h> 17 18 #include <algorithm> 19 #include <vector> 20 21 #include "base/basictypes.h" 22 #include "base/logging.h" 23 #include "base/memory/singleton.h" 24 #include "base/strings/utf_string_conversion_utils.h" 25 #include "base/strings/utf_string_conversions.h" 26 #include "base/third_party/icu/icu_utf.h" 27 #include "build/build_config.h" 28 29 namespace { 30 31 // Force the singleton used by Empty[W]String[16] to be a unique type. This 32 // prevents other code that might accidentally use Singleton<string> from 33 // getting our internal one. 34 struct EmptyStrings { 35 EmptyStrings() {} 36 const std::string s; 37 const std::wstring ws; 38 const string16 s16; 39 40 static EmptyStrings* GetInstance() { 41 return Singleton<EmptyStrings>::get(); 42 } 43 }; 44 45 // Used by ReplaceStringPlaceholders to track the position in the string of 46 // replaced parameters. 47 struct ReplacementOffset { 48 ReplacementOffset(uintptr_t parameter, size_t offset) 49 : parameter(parameter), 50 offset(offset) {} 51 52 // Index of the parameter. 53 uintptr_t parameter; 54 55 // Starting position in the string. 56 size_t offset; 57 }; 58 59 static bool CompareParameter(const ReplacementOffset& elem1, 60 const ReplacementOffset& elem2) { 61 return elem1.parameter < elem2.parameter; 62 } 63 64 } // namespace 65 66 namespace base { 67 68 bool IsWprintfFormatPortable(const wchar_t* format) { 69 for (const wchar_t* position = format; *position != '\0'; ++position) { 70 if (*position == '%') { 71 bool in_specification = true; 72 bool modifier_l = false; 73 while (in_specification) { 74 // Eat up characters until reaching a known specifier. 75 if (*++position == '\0') { 76 // The format string ended in the middle of a specification. Call 77 // it portable because no unportable specifications were found. The 78 // string is equally broken on all platforms. 79 return true; 80 } 81 82 if (*position == 'l') { 83 // 'l' is the only thing that can save the 's' and 'c' specifiers. 84 modifier_l = true; 85 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 86 *position == 'S' || *position == 'C' || *position == 'F' || 87 *position == 'D' || *position == 'O' || *position == 'U') { 88 // Not portable. 89 return false; 90 } 91 92 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 93 // Portable, keep scanning the rest of the format string. 94 in_specification = false; 95 } 96 } 97 } 98 } 99 100 return true; 101 } 102 103 } // namespace base 104 105 106 const std::string& EmptyString() { 107 return EmptyStrings::GetInstance()->s; 108 } 109 110 const std::wstring& EmptyWString() { 111 return EmptyStrings::GetInstance()->ws; 112 } 113 114 const string16& EmptyString16() { 115 return EmptyStrings::GetInstance()->s16; 116 } 117 118 template<typename STR> 119 bool ReplaceCharsT(const STR& input, 120 const typename STR::value_type replace_chars[], 121 const STR& replace_with, 122 STR* output) { 123 bool removed = false; 124 size_t replace_length = replace_with.length(); 125 126 *output = input; 127 128 size_t found = output->find_first_of(replace_chars); 129 while (found != STR::npos) { 130 removed = true; 131 output->replace(found, 1, replace_with); 132 found = output->find_first_of(replace_chars, found + replace_length); 133 } 134 135 return removed; 136 } 137 138 bool ReplaceChars(const string16& input, 139 const char16 replace_chars[], 140 const string16& replace_with, 141 string16* output) { 142 return ReplaceCharsT(input, replace_chars, replace_with, output); 143 } 144 145 bool ReplaceChars(const std::string& input, 146 const char replace_chars[], 147 const std::string& replace_with, 148 std::string* output) { 149 return ReplaceCharsT(input, replace_chars, replace_with, output); 150 } 151 152 bool RemoveChars(const string16& input, 153 const char16 remove_chars[], 154 string16* output) { 155 return ReplaceChars(input, remove_chars, string16(), output); 156 } 157 158 bool RemoveChars(const std::string& input, 159 const char remove_chars[], 160 std::string* output) { 161 return ReplaceChars(input, remove_chars, std::string(), output); 162 } 163 164 template<typename STR> 165 TrimPositions TrimStringT(const STR& input, 166 const typename STR::value_type trim_chars[], 167 TrimPositions positions, 168 STR* output) { 169 // Find the edges of leading/trailing whitespace as desired. 170 const typename STR::size_type last_char = input.length() - 1; 171 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? 172 input.find_first_not_of(trim_chars) : 0; 173 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? 174 input.find_last_not_of(trim_chars) : last_char; 175 176 // When the string was all whitespace, report that we stripped off whitespace 177 // from whichever position the caller was interested in. For empty input, we 178 // stripped no whitespace, but we still need to clear |output|. 179 if (input.empty() || 180 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 181 bool input_was_empty = input.empty(); // in case output == &input 182 output->clear(); 183 return input_was_empty ? TRIM_NONE : positions; 184 } 185 186 // Trim the whitespace. 187 *output = 188 input.substr(first_good_char, last_good_char - first_good_char + 1); 189 190 // Return where we trimmed from. 191 return static_cast<TrimPositions>( 192 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 193 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 194 } 195 196 bool TrimString(const std::wstring& input, 197 const wchar_t trim_chars[], 198 std::wstring* output) { 199 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 200 } 201 202 #if !defined(WCHAR_T_IS_UTF16) 203 bool TrimString(const string16& input, 204 const char16 trim_chars[], 205 string16* output) { 206 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 207 } 208 #endif 209 210 bool TrimString(const std::string& input, 211 const char trim_chars[], 212 std::string* output) { 213 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 214 } 215 216 void TruncateUTF8ToByteSize(const std::string& input, 217 const size_t byte_size, 218 std::string* output) { 219 DCHECK(output); 220 if (byte_size > input.length()) { 221 *output = input; 222 return; 223 } 224 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); 225 // Note: This cast is necessary because CBU8_NEXT uses int32s. 226 int32 truncation_length = static_cast<int32>(byte_size); 227 int32 char_index = truncation_length - 1; 228 const char* data = input.data(); 229 230 // Using CBU8, we will move backwards from the truncation point 231 // to the beginning of the string looking for a valid UTF8 232 // character. Once a full UTF8 character is found, we will 233 // truncate the string to the end of that character. 234 while (char_index >= 0) { 235 int32 prev = char_index; 236 uint32 code_point = 0; 237 CBU8_NEXT(data, char_index, truncation_length, code_point); 238 if (!base::IsValidCharacter(code_point) || 239 !base::IsValidCodepoint(code_point)) { 240 char_index = prev - 1; 241 } else { 242 break; 243 } 244 } 245 246 if (char_index >= 0 ) 247 *output = input.substr(0, char_index); 248 else 249 output->clear(); 250 } 251 252 TrimPositions TrimWhitespace(const string16& input, 253 TrimPositions positions, 254 string16* output) { 255 return TrimStringT(input, kWhitespaceUTF16, positions, output); 256 } 257 258 TrimPositions TrimWhitespaceASCII(const std::string& input, 259 TrimPositions positions, 260 std::string* output) { 261 return TrimStringT(input, kWhitespaceASCII, positions, output); 262 } 263 264 // This function is only for backward-compatibility. 265 // To be removed when all callers are updated. 266 TrimPositions TrimWhitespace(const std::string& input, 267 TrimPositions positions, 268 std::string* output) { 269 return TrimWhitespaceASCII(input, positions, output); 270 } 271 272 template<typename STR> 273 STR CollapseWhitespaceT(const STR& text, 274 bool trim_sequences_with_line_breaks) { 275 STR result; 276 result.resize(text.size()); 277 278 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 279 // will trim any leading whitespace. 280 bool in_whitespace = true; 281 bool already_trimmed = true; 282 283 int chars_written = 0; 284 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 285 if (IsWhitespace(*i)) { 286 if (!in_whitespace) { 287 // Reduce all whitespace sequences to a single space. 288 in_whitespace = true; 289 result[chars_written++] = L' '; 290 } 291 if (trim_sequences_with_line_breaks && !already_trimmed && 292 ((*i == '\n') || (*i == '\r'))) { 293 // Whitespace sequences containing CR or LF are eliminated entirely. 294 already_trimmed = true; 295 --chars_written; 296 } 297 } else { 298 // Non-whitespace chracters are copied straight across. 299 in_whitespace = false; 300 already_trimmed = false; 301 result[chars_written++] = *i; 302 } 303 } 304 305 if (in_whitespace && !already_trimmed) { 306 // Any trailing whitespace is eliminated. 307 --chars_written; 308 } 309 310 result.resize(chars_written); 311 return result; 312 } 313 314 std::wstring CollapseWhitespace(const std::wstring& text, 315 bool trim_sequences_with_line_breaks) { 316 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 317 } 318 319 #if !defined(WCHAR_T_IS_UTF16) 320 string16 CollapseWhitespace(const string16& text, 321 bool trim_sequences_with_line_breaks) { 322 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 323 } 324 #endif 325 326 std::string CollapseWhitespaceASCII(const std::string& text, 327 bool trim_sequences_with_line_breaks) { 328 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 329 } 330 331 bool ContainsOnlyWhitespaceASCII(const std::string& str) { 332 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { 333 if (!IsAsciiWhitespace(*i)) 334 return false; 335 } 336 return true; 337 } 338 339 bool ContainsOnlyWhitespace(const string16& str) { 340 return str.find_first_not_of(kWhitespaceUTF16) == string16::npos; 341 } 342 343 template<typename STR> 344 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { 345 for (typename STR::const_iterator iter = input.begin(); 346 iter != input.end(); ++iter) { 347 if (characters.find(*iter) == STR::npos) 348 return false; 349 } 350 return true; 351 } 352 353 bool ContainsOnlyChars(const std::wstring& input, 354 const std::wstring& characters) { 355 return ContainsOnlyCharsT(input, characters); 356 } 357 358 #if !defined(WCHAR_T_IS_UTF16) 359 bool ContainsOnlyChars(const string16& input, const string16& characters) { 360 return ContainsOnlyCharsT(input, characters); 361 } 362 #endif 363 364 bool ContainsOnlyChars(const std::string& input, 365 const std::string& characters) { 366 return ContainsOnlyCharsT(input, characters); 367 } 368 369 std::string WideToASCII(const std::wstring& wide) { 370 DCHECK(IsStringASCII(wide)) << wide; 371 return std::string(wide.begin(), wide.end()); 372 } 373 374 std::string UTF16ToASCII(const string16& utf16) { 375 DCHECK(IsStringASCII(utf16)) << utf16; 376 return std::string(utf16.begin(), utf16.end()); 377 } 378 379 // Latin1 is just the low range of Unicode, so we can copy directly to convert. 380 bool WideToLatin1(const std::wstring& wide, std::string* latin1) { 381 std::string output; 382 output.resize(wide.size()); 383 latin1->clear(); 384 for (size_t i = 0; i < wide.size(); i++) { 385 if (wide[i] > 255) 386 return false; 387 output[i] = static_cast<char>(wide[i]); 388 } 389 latin1->swap(output); 390 return true; 391 } 392 393 template<class STR> 394 static bool DoIsStringASCII(const STR& str) { 395 for (size_t i = 0; i < str.length(); i++) { 396 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 397 if (c > 0x7F) 398 return false; 399 } 400 return true; 401 } 402 403 bool IsStringASCII(const std::wstring& str) { 404 return DoIsStringASCII(str); 405 } 406 407 #if !defined(WCHAR_T_IS_UTF16) 408 bool IsStringASCII(const string16& str) { 409 return DoIsStringASCII(str); 410 } 411 #endif 412 413 bool IsStringASCII(const base::StringPiece& str) { 414 return DoIsStringASCII(str); 415 } 416 417 bool IsStringUTF8(const std::string& str) { 418 const char *src = str.data(); 419 int32 src_len = static_cast<int32>(str.length()); 420 int32 char_index = 0; 421 422 while (char_index < src_len) { 423 int32 code_point; 424 CBU8_NEXT(src, char_index, src_len, code_point); 425 if (!base::IsValidCharacter(code_point)) 426 return false; 427 } 428 return true; 429 } 430 431 template<typename Iter> 432 static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 433 Iter a_end, 434 const char* b) { 435 for (Iter it = a_begin; it != a_end; ++it, ++b) { 436 if (!*b || base::ToLowerASCII(*it) != *b) 437 return false; 438 } 439 return *b == 0; 440 } 441 442 // Front-ends for LowerCaseEqualsASCII. 443 bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 444 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 445 } 446 447 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) { 448 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 449 } 450 451 #if !defined(WCHAR_T_IS_UTF16) 452 bool LowerCaseEqualsASCII(const string16& a, const char* b) { 453 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 454 } 455 #endif 456 457 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 458 std::string::const_iterator a_end, 459 const char* b) { 460 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 461 } 462 463 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, 464 std::wstring::const_iterator a_end, 465 const char* b) { 466 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 467 } 468 469 #if !defined(WCHAR_T_IS_UTF16) 470 bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 471 string16::const_iterator a_end, 472 const char* b) { 473 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 474 } 475 #endif 476 477 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here. 478 #if !defined(OS_ANDROID) 479 bool LowerCaseEqualsASCII(const char* a_begin, 480 const char* a_end, 481 const char* b) { 482 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 483 } 484 485 bool LowerCaseEqualsASCII(const wchar_t* a_begin, 486 const wchar_t* a_end, 487 const char* b) { 488 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 489 } 490 491 #if !defined(WCHAR_T_IS_UTF16) 492 bool LowerCaseEqualsASCII(const char16* a_begin, 493 const char16* a_end, 494 const char* b) { 495 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 496 } 497 #endif 498 499 #endif // !defined(OS_ANDROID) 500 501 bool EqualsASCII(const string16& a, const base::StringPiece& b) { 502 if (a.length() != b.length()) 503 return false; 504 return std::equal(b.begin(), b.end(), a.begin()); 505 } 506 507 bool StartsWithASCII(const std::string& str, 508 const std::string& search, 509 bool case_sensitive) { 510 if (case_sensitive) 511 return str.compare(0, search.length(), search) == 0; 512 else 513 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 514 } 515 516 template <typename STR> 517 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 518 if (case_sensitive) { 519 return str.compare(0, search.length(), search) == 0; 520 } else { 521 if (search.size() > str.size()) 522 return false; 523 return std::equal(search.begin(), search.end(), str.begin(), 524 base::CaseInsensitiveCompare<typename STR::value_type>()); 525 } 526 } 527 528 bool StartsWith(const std::wstring& str, const std::wstring& search, 529 bool case_sensitive) { 530 return StartsWithT(str, search, case_sensitive); 531 } 532 533 #if !defined(WCHAR_T_IS_UTF16) 534 bool StartsWith(const string16& str, const string16& search, 535 bool case_sensitive) { 536 return StartsWithT(str, search, case_sensitive); 537 } 538 #endif 539 540 template <typename STR> 541 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 542 typename STR::size_type str_length = str.length(); 543 typename STR::size_type search_length = search.length(); 544 if (search_length > str_length) 545 return false; 546 if (case_sensitive) { 547 return str.compare(str_length - search_length, search_length, search) == 0; 548 } else { 549 return std::equal(search.begin(), search.end(), 550 str.begin() + (str_length - search_length), 551 base::CaseInsensitiveCompare<typename STR::value_type>()); 552 } 553 } 554 555 bool EndsWith(const std::string& str, const std::string& search, 556 bool case_sensitive) { 557 return EndsWithT(str, search, case_sensitive); 558 } 559 560 bool EndsWith(const std::wstring& str, const std::wstring& search, 561 bool case_sensitive) { 562 return EndsWithT(str, search, case_sensitive); 563 } 564 565 #if !defined(WCHAR_T_IS_UTF16) 566 bool EndsWith(const string16& str, const string16& search, 567 bool case_sensitive) { 568 return EndsWithT(str, search, case_sensitive); 569 } 570 #endif 571 572 static const char* const kByteStringsUnlocalized[] = { 573 " B", 574 " kB", 575 " MB", 576 " GB", 577 " TB", 578 " PB" 579 }; 580 581 string16 FormatBytesUnlocalized(int64 bytes) { 582 double unit_amount = static_cast<double>(bytes); 583 size_t dimension = 0; 584 const int kKilo = 1024; 585 while (unit_amount >= kKilo && 586 dimension < arraysize(kByteStringsUnlocalized) - 1) { 587 unit_amount /= kKilo; 588 dimension++; 589 } 590 591 char buf[64]; 592 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 593 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 594 kByteStringsUnlocalized[dimension]); 595 } else { 596 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 597 kByteStringsUnlocalized[dimension]); 598 } 599 600 return ASCIIToUTF16(buf); 601 } 602 603 template<class StringType> 604 void DoReplaceSubstringsAfterOffset(StringType* str, 605 typename StringType::size_type start_offset, 606 const StringType& find_this, 607 const StringType& replace_with, 608 bool replace_all) { 609 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 610 return; 611 612 DCHECK(!find_this.empty()); 613 for (typename StringType::size_type offs(str->find(find_this, start_offset)); 614 offs != StringType::npos; offs = str->find(find_this, offs)) { 615 str->replace(offs, find_this.length(), replace_with); 616 offs += replace_with.length(); 617 618 if (!replace_all) 619 break; 620 } 621 } 622 623 void ReplaceFirstSubstringAfterOffset(string16* str, 624 string16::size_type start_offset, 625 const string16& find_this, 626 const string16& replace_with) { 627 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 628 false); // replace first instance 629 } 630 631 void ReplaceFirstSubstringAfterOffset(std::string* str, 632 std::string::size_type start_offset, 633 const std::string& find_this, 634 const std::string& replace_with) { 635 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 636 false); // replace first instance 637 } 638 639 void ReplaceSubstringsAfterOffset(string16* str, 640 string16::size_type start_offset, 641 const string16& find_this, 642 const string16& replace_with) { 643 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 644 true); // replace all instances 645 } 646 647 void ReplaceSubstringsAfterOffset(std::string* str, 648 std::string::size_type start_offset, 649 const std::string& find_this, 650 const std::string& replace_with) { 651 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 652 true); // replace all instances 653 } 654 655 656 template<typename STR> 657 static size_t TokenizeT(const STR& str, 658 const STR& delimiters, 659 std::vector<STR>* tokens) { 660 tokens->clear(); 661 662 typename STR::size_type start = str.find_first_not_of(delimiters); 663 while (start != STR::npos) { 664 typename STR::size_type end = str.find_first_of(delimiters, start + 1); 665 if (end == STR::npos) { 666 tokens->push_back(str.substr(start)); 667 break; 668 } else { 669 tokens->push_back(str.substr(start, end - start)); 670 start = str.find_first_not_of(delimiters, end + 1); 671 } 672 } 673 674 return tokens->size(); 675 } 676 677 size_t Tokenize(const std::wstring& str, 678 const std::wstring& delimiters, 679 std::vector<std::wstring>* tokens) { 680 return TokenizeT(str, delimiters, tokens); 681 } 682 683 #if !defined(WCHAR_T_IS_UTF16) 684 size_t Tokenize(const string16& str, 685 const string16& delimiters, 686 std::vector<string16>* tokens) { 687 return TokenizeT(str, delimiters, tokens); 688 } 689 #endif 690 691 size_t Tokenize(const std::string& str, 692 const std::string& delimiters, 693 std::vector<std::string>* tokens) { 694 return TokenizeT(str, delimiters, tokens); 695 } 696 697 size_t Tokenize(const base::StringPiece& str, 698 const base::StringPiece& delimiters, 699 std::vector<base::StringPiece>* tokens) { 700 return TokenizeT(str, delimiters, tokens); 701 } 702 703 template<typename STR> 704 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) { 705 if (parts.empty()) 706 return STR(); 707 708 STR result(parts[0]); 709 typename std::vector<STR>::const_iterator iter = parts.begin(); 710 ++iter; 711 712 for (; iter != parts.end(); ++iter) { 713 result += sep; 714 result += *iter; 715 } 716 717 return result; 718 } 719 720 std::string JoinString(const std::vector<std::string>& parts, char sep) { 721 return JoinStringT(parts, std::string(1, sep)); 722 } 723 724 string16 JoinString(const std::vector<string16>& parts, char16 sep) { 725 return JoinStringT(parts, string16(1, sep)); 726 } 727 728 std::string JoinString(const std::vector<std::string>& parts, 729 const std::string& separator) { 730 return JoinStringT(parts, separator); 731 } 732 733 string16 JoinString(const std::vector<string16>& parts, 734 const string16& separator) { 735 return JoinStringT(parts, separator); 736 } 737 738 template<class FormatStringType, class OutStringType> 739 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 740 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 741 size_t substitutions = subst.size(); 742 743 size_t sub_length = 0; 744 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 745 iter != subst.end(); ++iter) { 746 sub_length += iter->length(); 747 } 748 749 OutStringType formatted; 750 formatted.reserve(format_string.length() + sub_length); 751 752 std::vector<ReplacementOffset> r_offsets; 753 for (typename FormatStringType::const_iterator i = format_string.begin(); 754 i != format_string.end(); ++i) { 755 if ('$' == *i) { 756 if (i + 1 != format_string.end()) { 757 ++i; 758 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 759 if ('$' == *i) { 760 while (i != format_string.end() && '$' == *i) { 761 formatted.push_back('$'); 762 ++i; 763 } 764 --i; 765 } else { 766 uintptr_t index = 0; 767 while (i != format_string.end() && '0' <= *i && *i <= '9') { 768 index *= 10; 769 index += *i - '0'; 770 ++i; 771 } 772 --i; 773 index -= 1; 774 if (offsets) { 775 ReplacementOffset r_offset(index, 776 static_cast<int>(formatted.size())); 777 r_offsets.insert(std::lower_bound(r_offsets.begin(), 778 r_offsets.end(), 779 r_offset, 780 &CompareParameter), 781 r_offset); 782 } 783 if (index < substitutions) 784 formatted.append(subst.at(index)); 785 } 786 } 787 } else { 788 formatted.push_back(*i); 789 } 790 } 791 if (offsets) { 792 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 793 i != r_offsets.end(); ++i) { 794 offsets->push_back(i->offset); 795 } 796 } 797 return formatted; 798 } 799 800 string16 ReplaceStringPlaceholders(const string16& format_string, 801 const std::vector<string16>& subst, 802 std::vector<size_t>* offsets) { 803 return DoReplaceStringPlaceholders(format_string, subst, offsets); 804 } 805 806 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 807 const std::vector<std::string>& subst, 808 std::vector<size_t>* offsets) { 809 return DoReplaceStringPlaceholders(format_string, subst, offsets); 810 } 811 812 string16 ReplaceStringPlaceholders(const string16& format_string, 813 const string16& a, 814 size_t* offset) { 815 std::vector<size_t> offsets; 816 std::vector<string16> subst; 817 subst.push_back(a); 818 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 819 820 DCHECK(offsets.size() == 1); 821 if (offset) { 822 *offset = offsets[0]; 823 } 824 return result; 825 } 826 827 static bool IsWildcard(base_icu::UChar32 character) { 828 return character == '*' || character == '?'; 829 } 830 831 // Move the strings pointers to the point where they start to differ. 832 template <typename CHAR, typename NEXT> 833 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, 834 const CHAR** string, const CHAR* string_end, 835 NEXT next) { 836 const CHAR* escape = NULL; 837 while (*pattern != pattern_end && *string != string_end) { 838 if (!escape && IsWildcard(**pattern)) { 839 // We don't want to match wildcard here, except if it's escaped. 840 return; 841 } 842 843 // Check if the escapement char is found. If so, skip it and move to the 844 // next character. 845 if (!escape && **pattern == '\\') { 846 escape = *pattern; 847 next(pattern, pattern_end); 848 continue; 849 } 850 851 // Check if the chars match, if so, increment the ptrs. 852 const CHAR* pattern_next = *pattern; 853 const CHAR* string_next = *string; 854 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 855 if (pattern_char == next(&string_next, string_end) && 856 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { 857 *pattern = pattern_next; 858 *string = string_next; 859 } else { 860 // Uh ho, it did not match, we are done. If the last char was an 861 // escapement, that means that it was an error to advance the ptr here, 862 // let's put it back where it was. This also mean that the MatchPattern 863 // function will return false because if we can't match an escape char 864 // here, then no one will. 865 if (escape) { 866 *pattern = escape; 867 } 868 return; 869 } 870 871 escape = NULL; 872 } 873 } 874 875 template <typename CHAR, typename NEXT> 876 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { 877 while (*pattern != end) { 878 if (!IsWildcard(**pattern)) 879 return; 880 next(pattern, end); 881 } 882 } 883 884 template <typename CHAR, typename NEXT> 885 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, 886 const CHAR* pattern, const CHAR* pattern_end, 887 int depth, 888 NEXT next) { 889 const int kMaxDepth = 16; 890 if (depth > kMaxDepth) 891 return false; 892 893 // Eat all the matching chars. 894 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); 895 896 // If the string is empty, then the pattern must be empty too, or contains 897 // only wildcards. 898 if (eval == eval_end) { 899 EatWildcard(&pattern, pattern_end, next); 900 return pattern == pattern_end; 901 } 902 903 // Pattern is empty but not string, this is not a match. 904 if (pattern == pattern_end) 905 return false; 906 907 // If this is a question mark, then we need to compare the rest with 908 // the current string or the string with one character eaten. 909 const CHAR* next_pattern = pattern; 910 next(&next_pattern, pattern_end); 911 if (pattern[0] == '?') { 912 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 913 depth + 1, next)) 914 return true; 915 const CHAR* next_eval = eval; 916 next(&next_eval, eval_end); 917 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, 918 depth + 1, next)) 919 return true; 920 } 921 922 // This is a *, try to match all the possible substrings with the remainder 923 // of the pattern. 924 if (pattern[0] == '*') { 925 // Collapse duplicate wild cards (********** into *) so that the 926 // method does not recurse unnecessarily. http://crbug.com/52839 927 EatWildcard(&next_pattern, pattern_end, next); 928 929 while (eval != eval_end) { 930 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 931 depth + 1, next)) 932 return true; 933 eval++; 934 } 935 936 // We reached the end of the string, let see if the pattern contains only 937 // wildcards. 938 if (eval == eval_end) { 939 EatWildcard(&pattern, pattern_end, next); 940 if (pattern != pattern_end) 941 return false; 942 return true; 943 } 944 } 945 946 return false; 947 } 948 949 struct NextCharUTF8 { 950 base_icu::UChar32 operator()(const char** p, const char* end) { 951 base_icu::UChar32 c; 952 int offset = 0; 953 CBU8_NEXT(*p, offset, end - *p, c); 954 *p += offset; 955 return c; 956 } 957 }; 958 959 struct NextCharUTF16 { 960 base_icu::UChar32 operator()(const char16** p, const char16* end) { 961 base_icu::UChar32 c; 962 int offset = 0; 963 CBU16_NEXT(*p, offset, end - *p, c); 964 *p += offset; 965 return c; 966 } 967 }; 968 969 bool MatchPattern(const base::StringPiece& eval, 970 const base::StringPiece& pattern) { 971 return MatchPatternT(eval.data(), eval.data() + eval.size(), 972 pattern.data(), pattern.data() + pattern.size(), 973 0, NextCharUTF8()); 974 } 975 976 bool MatchPattern(const string16& eval, const string16& pattern) { 977 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), 978 pattern.c_str(), pattern.c_str() + pattern.size(), 979 0, NextCharUTF16()); 980 } 981 982 // The following code is compatible with the OpenBSD lcpy interface. See: 983 // http://www.gratisoft.us/todd/papers/strlcpy.html 984 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 985 986 namespace { 987 988 template <typename CHAR> 989 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 990 for (size_t i = 0; i < dst_size; ++i) { 991 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 992 return i; 993 } 994 995 // We were left off at dst_size. We over copied 1 byte. Null terminate. 996 if (dst_size != 0) 997 dst[dst_size - 1] = 0; 998 999 // Count the rest of the |src|, and return it's length in characters. 1000 while (src[dst_size]) ++dst_size; 1001 return dst_size; 1002 } 1003 1004 } // namespace 1005 1006 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 1007 return lcpyT<char>(dst, src, dst_size); 1008 } 1009 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 1010 return lcpyT<wchar_t>(dst, src, dst_size); 1011 } 1012