1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_util.h" 6 7 #include <ctype.h> 8 #include <errno.h> 9 #include <math.h> 10 #include <stdarg.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <time.h> 15 #include <wchar.h> 16 #include <wctype.h> 17 18 #include <algorithm> 19 #include <vector> 20 21 #include "base/basictypes.h" 22 #include "base/logging.h" 23 #include "base/memory/singleton.h" 24 #include "base/strings/utf_string_conversion_utils.h" 25 #include "base/strings/utf_string_conversions.h" 26 #include "base/third_party/icu/icu_utf.h" 27 #include "build/build_config.h" 28 29 // Remove when this entire file is in the base namespace. 30 using base::char16; 31 using base::string16; 32 33 namespace { 34 35 // Force the singleton used by EmptyString[16] to be a unique type. This 36 // prevents other code that might accidentally use Singleton<string> from 37 // getting our internal one. 38 struct EmptyStrings { 39 EmptyStrings() {} 40 const std::string s; 41 const string16 s16; 42 43 static EmptyStrings* GetInstance() { 44 return Singleton<EmptyStrings>::get(); 45 } 46 }; 47 48 // Used by ReplaceStringPlaceholders to track the position in the string of 49 // replaced parameters. 50 struct ReplacementOffset { 51 ReplacementOffset(uintptr_t parameter, size_t offset) 52 : parameter(parameter), 53 offset(offset) {} 54 55 // Index of the parameter. 56 uintptr_t parameter; 57 58 // Starting position in the string. 59 size_t offset; 60 }; 61 62 static bool CompareParameter(const ReplacementOffset& elem1, 63 const ReplacementOffset& elem2) { 64 return elem1.parameter < elem2.parameter; 65 } 66 67 } // namespace 68 69 namespace base { 70 71 bool IsWprintfFormatPortable(const wchar_t* format) { 72 for (const wchar_t* position = format; *position != '\0'; ++position) { 73 if (*position == '%') { 74 bool in_specification = true; 75 bool modifier_l = false; 76 while (in_specification) { 77 // Eat up characters until reaching a known specifier. 78 if (*++position == '\0') { 79 // The format string ended in the middle of a specification. Call 80 // it portable because no unportable specifications were found. The 81 // string is equally broken on all platforms. 82 return true; 83 } 84 85 if (*position == 'l') { 86 // 'l' is the only thing that can save the 's' and 'c' specifiers. 87 modifier_l = true; 88 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 89 *position == 'S' || *position == 'C' || *position == 'F' || 90 *position == 'D' || *position == 'O' || *position == 'U') { 91 // Not portable. 92 return false; 93 } 94 95 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 96 // Portable, keep scanning the rest of the format string. 97 in_specification = false; 98 } 99 } 100 } 101 } 102 103 return true; 104 } 105 106 const std::string& EmptyString() { 107 return EmptyStrings::GetInstance()->s; 108 } 109 110 const string16& EmptyString16() { 111 return EmptyStrings::GetInstance()->s16; 112 } 113 114 template<typename STR> 115 bool ReplaceCharsT(const STR& input, 116 const STR& replace_chars, 117 const STR& replace_with, 118 STR* output) { 119 bool removed = false; 120 size_t replace_length = replace_with.length(); 121 122 *output = input; 123 124 size_t found = output->find_first_of(replace_chars); 125 while (found != STR::npos) { 126 removed = true; 127 output->replace(found, 1, replace_with); 128 found = output->find_first_of(replace_chars, found + replace_length); 129 } 130 131 return removed; 132 } 133 134 bool ReplaceChars(const string16& input, 135 const base::StringPiece16& replace_chars, 136 const string16& replace_with, 137 string16* output) { 138 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 139 } 140 141 bool ReplaceChars(const std::string& input, 142 const base::StringPiece& replace_chars, 143 const std::string& replace_with, 144 std::string* output) { 145 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 146 } 147 148 bool RemoveChars(const string16& input, 149 const base::StringPiece16& remove_chars, 150 string16* output) { 151 return ReplaceChars(input, remove_chars.as_string(), string16(), output); 152 } 153 154 bool RemoveChars(const std::string& input, 155 const base::StringPiece& remove_chars, 156 std::string* output) { 157 return ReplaceChars(input, remove_chars.as_string(), std::string(), output); 158 } 159 160 template<typename STR> 161 TrimPositions TrimStringT(const STR& input, 162 const STR& trim_chars, 163 TrimPositions positions, 164 STR* output) { 165 // Find the edges of leading/trailing whitespace as desired. 166 const size_t last_char = input.length() - 1; 167 const size_t first_good_char = (positions & TRIM_LEADING) ? 168 input.find_first_not_of(trim_chars) : 0; 169 const size_t last_good_char = (positions & TRIM_TRAILING) ? 170 input.find_last_not_of(trim_chars) : last_char; 171 172 // When the string was all whitespace, report that we stripped off whitespace 173 // from whichever position the caller was interested in. For empty input, we 174 // stripped no whitespace, but we still need to clear |output|. 175 if (input.empty() || 176 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 177 bool input_was_empty = input.empty(); // in case output == &input 178 output->clear(); 179 return input_was_empty ? TRIM_NONE : positions; 180 } 181 182 // Trim the whitespace. 183 *output = 184 input.substr(first_good_char, last_good_char - first_good_char + 1); 185 186 // Return where we trimmed from. 187 return static_cast<TrimPositions>( 188 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 189 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 190 } 191 192 bool TrimString(const string16& input, 193 const base::StringPiece16& trim_chars, 194 string16* output) { 195 return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) != 196 TRIM_NONE; 197 } 198 199 bool TrimString(const std::string& input, 200 const base::StringPiece& trim_chars, 201 std::string* output) { 202 return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) != 203 TRIM_NONE; 204 } 205 206 void TruncateUTF8ToByteSize(const std::string& input, 207 const size_t byte_size, 208 std::string* output) { 209 DCHECK(output); 210 if (byte_size > input.length()) { 211 *output = input; 212 return; 213 } 214 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); 215 // Note: This cast is necessary because CBU8_NEXT uses int32s. 216 int32 truncation_length = static_cast<int32>(byte_size); 217 int32 char_index = truncation_length - 1; 218 const char* data = input.data(); 219 220 // Using CBU8, we will move backwards from the truncation point 221 // to the beginning of the string looking for a valid UTF8 222 // character. Once a full UTF8 character is found, we will 223 // truncate the string to the end of that character. 224 while (char_index >= 0) { 225 int32 prev = char_index; 226 uint32 code_point = 0; 227 CBU8_NEXT(data, char_index, truncation_length, code_point); 228 if (!IsValidCharacter(code_point) || 229 !IsValidCodepoint(code_point)) { 230 char_index = prev - 1; 231 } else { 232 break; 233 } 234 } 235 236 if (char_index >= 0 ) 237 *output = input.substr(0, char_index); 238 else 239 output->clear(); 240 } 241 242 TrimPositions TrimWhitespace(const string16& input, 243 TrimPositions positions, 244 string16* output) { 245 return TrimStringT(input, base::string16(kWhitespaceUTF16), positions, 246 output); 247 } 248 249 TrimPositions TrimWhitespaceASCII(const std::string& input, 250 TrimPositions positions, 251 std::string* output) { 252 return TrimStringT(input, std::string(kWhitespaceASCII), positions, output); 253 } 254 255 // This function is only for backward-compatibility. 256 // To be removed when all callers are updated. 257 TrimPositions TrimWhitespace(const std::string& input, 258 TrimPositions positions, 259 std::string* output) { 260 return TrimWhitespaceASCII(input, positions, output); 261 } 262 263 template<typename STR> 264 STR CollapseWhitespaceT(const STR& text, 265 bool trim_sequences_with_line_breaks) { 266 STR result; 267 result.resize(text.size()); 268 269 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 270 // will trim any leading whitespace. 271 bool in_whitespace = true; 272 bool already_trimmed = true; 273 274 int chars_written = 0; 275 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 276 if (IsWhitespace(*i)) { 277 if (!in_whitespace) { 278 // Reduce all whitespace sequences to a single space. 279 in_whitespace = true; 280 result[chars_written++] = L' '; 281 } 282 if (trim_sequences_with_line_breaks && !already_trimmed && 283 ((*i == '\n') || (*i == '\r'))) { 284 // Whitespace sequences containing CR or LF are eliminated entirely. 285 already_trimmed = true; 286 --chars_written; 287 } 288 } else { 289 // Non-whitespace chracters are copied straight across. 290 in_whitespace = false; 291 already_trimmed = false; 292 result[chars_written++] = *i; 293 } 294 } 295 296 if (in_whitespace && !already_trimmed) { 297 // Any trailing whitespace is eliminated. 298 --chars_written; 299 } 300 301 result.resize(chars_written); 302 return result; 303 } 304 305 string16 CollapseWhitespace(const string16& text, 306 bool trim_sequences_with_line_breaks) { 307 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 308 } 309 310 std::string CollapseWhitespaceASCII(const std::string& text, 311 bool trim_sequences_with_line_breaks) { 312 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 313 } 314 315 bool ContainsOnlyChars(const StringPiece& input, 316 const StringPiece& characters) { 317 return input.find_first_not_of(characters) == StringPiece::npos; 318 } 319 320 bool ContainsOnlyChars(const StringPiece16& input, 321 const StringPiece16& characters) { 322 return input.find_first_not_of(characters) == StringPiece16::npos; 323 } 324 325 template<class STR> 326 static bool DoIsStringASCII(const STR& str) { 327 for (size_t i = 0; i < str.length(); i++) { 328 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 329 if (c > 0x7F) 330 return false; 331 } 332 return true; 333 } 334 335 bool IsStringASCII(const StringPiece& str) { 336 return DoIsStringASCII(str); 337 } 338 339 bool IsStringASCII(const string16& str) { 340 return DoIsStringASCII(str); 341 } 342 343 bool IsStringUTF8(const std::string& str) { 344 const char *src = str.data(); 345 int32 src_len = static_cast<int32>(str.length()); 346 int32 char_index = 0; 347 348 while (char_index < src_len) { 349 int32 code_point; 350 CBU8_NEXT(src, char_index, src_len, code_point); 351 if (!IsValidCharacter(code_point)) 352 return false; 353 } 354 return true; 355 } 356 357 } // namespace base 358 359 template<typename Iter> 360 static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 361 Iter a_end, 362 const char* b) { 363 for (Iter it = a_begin; it != a_end; ++it, ++b) { 364 if (!*b || base::ToLowerASCII(*it) != *b) 365 return false; 366 } 367 return *b == 0; 368 } 369 370 // Front-ends for LowerCaseEqualsASCII. 371 bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 372 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 373 } 374 375 bool LowerCaseEqualsASCII(const string16& a, const char* b) { 376 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 377 } 378 379 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 380 std::string::const_iterator a_end, 381 const char* b) { 382 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 383 } 384 385 bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 386 string16::const_iterator a_end, 387 const char* b) { 388 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 389 } 390 391 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here. 392 #if !defined(OS_ANDROID) 393 bool LowerCaseEqualsASCII(const char* a_begin, 394 const char* a_end, 395 const char* b) { 396 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 397 } 398 399 bool LowerCaseEqualsASCII(const char16* a_begin, 400 const char16* a_end, 401 const char* b) { 402 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 403 } 404 405 #endif // !defined(OS_ANDROID) 406 407 bool EqualsASCII(const string16& a, const base::StringPiece& b) { 408 if (a.length() != b.length()) 409 return false; 410 return std::equal(b.begin(), b.end(), a.begin()); 411 } 412 413 bool StartsWithASCII(const std::string& str, 414 const std::string& search, 415 bool case_sensitive) { 416 if (case_sensitive) 417 return str.compare(0, search.length(), search) == 0; 418 else 419 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 420 } 421 422 template <typename STR> 423 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 424 if (case_sensitive) { 425 return str.compare(0, search.length(), search) == 0; 426 } else { 427 if (search.size() > str.size()) 428 return false; 429 return std::equal(search.begin(), search.end(), str.begin(), 430 base::CaseInsensitiveCompare<typename STR::value_type>()); 431 } 432 } 433 434 bool StartsWith(const string16& str, const string16& search, 435 bool case_sensitive) { 436 return StartsWithT(str, search, case_sensitive); 437 } 438 439 template <typename STR> 440 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 441 size_t str_length = str.length(); 442 size_t search_length = search.length(); 443 if (search_length > str_length) 444 return false; 445 if (case_sensitive) 446 return str.compare(str_length - search_length, search_length, search) == 0; 447 return std::equal(search.begin(), search.end(), 448 str.begin() + (str_length - search_length), 449 base::CaseInsensitiveCompare<typename STR::value_type>()); 450 } 451 452 bool EndsWith(const std::string& str, const std::string& search, 453 bool case_sensitive) { 454 return EndsWithT(str, search, case_sensitive); 455 } 456 457 bool EndsWith(const string16& str, const string16& search, 458 bool case_sensitive) { 459 return EndsWithT(str, search, case_sensitive); 460 } 461 462 static const char* const kByteStringsUnlocalized[] = { 463 " B", 464 " kB", 465 " MB", 466 " GB", 467 " TB", 468 " PB" 469 }; 470 471 string16 FormatBytesUnlocalized(int64 bytes) { 472 double unit_amount = static_cast<double>(bytes); 473 size_t dimension = 0; 474 const int kKilo = 1024; 475 while (unit_amount >= kKilo && 476 dimension < arraysize(kByteStringsUnlocalized) - 1) { 477 unit_amount /= kKilo; 478 dimension++; 479 } 480 481 char buf[64]; 482 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 483 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 484 kByteStringsUnlocalized[dimension]); 485 } else { 486 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 487 kByteStringsUnlocalized[dimension]); 488 } 489 490 return base::ASCIIToUTF16(buf); 491 } 492 493 template<class StringType> 494 void DoReplaceSubstringsAfterOffset(StringType* str, 495 size_t start_offset, 496 const StringType& find_this, 497 const StringType& replace_with, 498 bool replace_all) { 499 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 500 return; 501 502 DCHECK(!find_this.empty()); 503 for (size_t offs(str->find(find_this, start_offset)); 504 offs != StringType::npos; offs = str->find(find_this, offs)) { 505 str->replace(offs, find_this.length(), replace_with); 506 offs += replace_with.length(); 507 508 if (!replace_all) 509 break; 510 } 511 } 512 513 void ReplaceFirstSubstringAfterOffset(string16* str, 514 size_t start_offset, 515 const string16& find_this, 516 const string16& replace_with) { 517 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 518 false); // replace first instance 519 } 520 521 void ReplaceFirstSubstringAfterOffset(std::string* str, 522 size_t start_offset, 523 const std::string& find_this, 524 const std::string& replace_with) { 525 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 526 false); // replace first instance 527 } 528 529 void ReplaceSubstringsAfterOffset(string16* str, 530 size_t start_offset, 531 const string16& find_this, 532 const string16& replace_with) { 533 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 534 true); // replace all instances 535 } 536 537 void ReplaceSubstringsAfterOffset(std::string* str, 538 size_t start_offset, 539 const std::string& find_this, 540 const std::string& replace_with) { 541 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 542 true); // replace all instances 543 } 544 545 546 template<typename STR> 547 static size_t TokenizeT(const STR& str, 548 const STR& delimiters, 549 std::vector<STR>* tokens) { 550 tokens->clear(); 551 552 size_t start = str.find_first_not_of(delimiters); 553 while (start != STR::npos) { 554 size_t end = str.find_first_of(delimiters, start + 1); 555 if (end == STR::npos) { 556 tokens->push_back(str.substr(start)); 557 break; 558 } else { 559 tokens->push_back(str.substr(start, end - start)); 560 start = str.find_first_not_of(delimiters, end + 1); 561 } 562 } 563 564 return tokens->size(); 565 } 566 567 size_t Tokenize(const string16& str, 568 const string16& delimiters, 569 std::vector<string16>* tokens) { 570 return TokenizeT(str, delimiters, tokens); 571 } 572 573 size_t Tokenize(const std::string& str, 574 const std::string& delimiters, 575 std::vector<std::string>* tokens) { 576 return TokenizeT(str, delimiters, tokens); 577 } 578 579 size_t Tokenize(const base::StringPiece& str, 580 const base::StringPiece& delimiters, 581 std::vector<base::StringPiece>* tokens) { 582 return TokenizeT(str, delimiters, tokens); 583 } 584 585 template<typename STR> 586 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) { 587 if (parts.empty()) 588 return STR(); 589 590 STR result(parts[0]); 591 typename std::vector<STR>::const_iterator iter = parts.begin(); 592 ++iter; 593 594 for (; iter != parts.end(); ++iter) { 595 result += sep; 596 result += *iter; 597 } 598 599 return result; 600 } 601 602 std::string JoinString(const std::vector<std::string>& parts, char sep) { 603 return JoinStringT(parts, std::string(1, sep)); 604 } 605 606 string16 JoinString(const std::vector<string16>& parts, char16 sep) { 607 return JoinStringT(parts, string16(1, sep)); 608 } 609 610 std::string JoinString(const std::vector<std::string>& parts, 611 const std::string& separator) { 612 return JoinStringT(parts, separator); 613 } 614 615 string16 JoinString(const std::vector<string16>& parts, 616 const string16& separator) { 617 return JoinStringT(parts, separator); 618 } 619 620 template<class FormatStringType, class OutStringType> 621 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 622 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 623 size_t substitutions = subst.size(); 624 625 size_t sub_length = 0; 626 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 627 iter != subst.end(); ++iter) { 628 sub_length += iter->length(); 629 } 630 631 OutStringType formatted; 632 formatted.reserve(format_string.length() + sub_length); 633 634 std::vector<ReplacementOffset> r_offsets; 635 for (typename FormatStringType::const_iterator i = format_string.begin(); 636 i != format_string.end(); ++i) { 637 if ('$' == *i) { 638 if (i + 1 != format_string.end()) { 639 ++i; 640 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 641 if ('$' == *i) { 642 while (i != format_string.end() && '$' == *i) { 643 formatted.push_back('$'); 644 ++i; 645 } 646 --i; 647 } else { 648 uintptr_t index = 0; 649 while (i != format_string.end() && '0' <= *i && *i <= '9') { 650 index *= 10; 651 index += *i - '0'; 652 ++i; 653 } 654 --i; 655 index -= 1; 656 if (offsets) { 657 ReplacementOffset r_offset(index, 658 static_cast<int>(formatted.size())); 659 r_offsets.insert(std::lower_bound(r_offsets.begin(), 660 r_offsets.end(), 661 r_offset, 662 &CompareParameter), 663 r_offset); 664 } 665 if (index < substitutions) 666 formatted.append(subst.at(index)); 667 } 668 } 669 } else { 670 formatted.push_back(*i); 671 } 672 } 673 if (offsets) { 674 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 675 i != r_offsets.end(); ++i) { 676 offsets->push_back(i->offset); 677 } 678 } 679 return formatted; 680 } 681 682 string16 ReplaceStringPlaceholders(const string16& format_string, 683 const std::vector<string16>& subst, 684 std::vector<size_t>* offsets) { 685 return DoReplaceStringPlaceholders(format_string, subst, offsets); 686 } 687 688 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 689 const std::vector<std::string>& subst, 690 std::vector<size_t>* offsets) { 691 return DoReplaceStringPlaceholders(format_string, subst, offsets); 692 } 693 694 string16 ReplaceStringPlaceholders(const string16& format_string, 695 const string16& a, 696 size_t* offset) { 697 std::vector<size_t> offsets; 698 std::vector<string16> subst; 699 subst.push_back(a); 700 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 701 702 DCHECK_EQ(1U, offsets.size()); 703 if (offset) 704 *offset = offsets[0]; 705 return result; 706 } 707 708 static bool IsWildcard(base_icu::UChar32 character) { 709 return character == '*' || character == '?'; 710 } 711 712 // Move the strings pointers to the point where they start to differ. 713 template <typename CHAR, typename NEXT> 714 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, 715 const CHAR** string, const CHAR* string_end, 716 NEXT next) { 717 const CHAR* escape = NULL; 718 while (*pattern != pattern_end && *string != string_end) { 719 if (!escape && IsWildcard(**pattern)) { 720 // We don't want to match wildcard here, except if it's escaped. 721 return; 722 } 723 724 // Check if the escapement char is found. If so, skip it and move to the 725 // next character. 726 if (!escape && **pattern == '\\') { 727 escape = *pattern; 728 next(pattern, pattern_end); 729 continue; 730 } 731 732 // Check if the chars match, if so, increment the ptrs. 733 const CHAR* pattern_next = *pattern; 734 const CHAR* string_next = *string; 735 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 736 if (pattern_char == next(&string_next, string_end) && 737 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { 738 *pattern = pattern_next; 739 *string = string_next; 740 } else { 741 // Uh ho, it did not match, we are done. If the last char was an 742 // escapement, that means that it was an error to advance the ptr here, 743 // let's put it back where it was. This also mean that the MatchPattern 744 // function will return false because if we can't match an escape char 745 // here, then no one will. 746 if (escape) { 747 *pattern = escape; 748 } 749 return; 750 } 751 752 escape = NULL; 753 } 754 } 755 756 template <typename CHAR, typename NEXT> 757 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { 758 while (*pattern != end) { 759 if (!IsWildcard(**pattern)) 760 return; 761 next(pattern, end); 762 } 763 } 764 765 template <typename CHAR, typename NEXT> 766 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, 767 const CHAR* pattern, const CHAR* pattern_end, 768 int depth, 769 NEXT next) { 770 const int kMaxDepth = 16; 771 if (depth > kMaxDepth) 772 return false; 773 774 // Eat all the matching chars. 775 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); 776 777 // If the string is empty, then the pattern must be empty too, or contains 778 // only wildcards. 779 if (eval == eval_end) { 780 EatWildcard(&pattern, pattern_end, next); 781 return pattern == pattern_end; 782 } 783 784 // Pattern is empty but not string, this is not a match. 785 if (pattern == pattern_end) 786 return false; 787 788 // If this is a question mark, then we need to compare the rest with 789 // the current string or the string with one character eaten. 790 const CHAR* next_pattern = pattern; 791 next(&next_pattern, pattern_end); 792 if (pattern[0] == '?') { 793 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 794 depth + 1, next)) 795 return true; 796 const CHAR* next_eval = eval; 797 next(&next_eval, eval_end); 798 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, 799 depth + 1, next)) 800 return true; 801 } 802 803 // This is a *, try to match all the possible substrings with the remainder 804 // of the pattern. 805 if (pattern[0] == '*') { 806 // Collapse duplicate wild cards (********** into *) so that the 807 // method does not recurse unnecessarily. http://crbug.com/52839 808 EatWildcard(&next_pattern, pattern_end, next); 809 810 while (eval != eval_end) { 811 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 812 depth + 1, next)) 813 return true; 814 eval++; 815 } 816 817 // We reached the end of the string, let see if the pattern contains only 818 // wildcards. 819 if (eval == eval_end) { 820 EatWildcard(&pattern, pattern_end, next); 821 if (pattern != pattern_end) 822 return false; 823 return true; 824 } 825 } 826 827 return false; 828 } 829 830 struct NextCharUTF8 { 831 base_icu::UChar32 operator()(const char** p, const char* end) { 832 base_icu::UChar32 c; 833 int offset = 0; 834 CBU8_NEXT(*p, offset, end - *p, c); 835 *p += offset; 836 return c; 837 } 838 }; 839 840 struct NextCharUTF16 { 841 base_icu::UChar32 operator()(const char16** p, const char16* end) { 842 base_icu::UChar32 c; 843 int offset = 0; 844 CBU16_NEXT(*p, offset, end - *p, c); 845 *p += offset; 846 return c; 847 } 848 }; 849 850 bool MatchPattern(const base::StringPiece& eval, 851 const base::StringPiece& pattern) { 852 return MatchPatternT(eval.data(), eval.data() + eval.size(), 853 pattern.data(), pattern.data() + pattern.size(), 854 0, NextCharUTF8()); 855 } 856 857 bool MatchPattern(const string16& eval, const string16& pattern) { 858 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), 859 pattern.c_str(), pattern.c_str() + pattern.size(), 860 0, NextCharUTF16()); 861 } 862 863 // The following code is compatible with the OpenBSD lcpy interface. See: 864 // http://www.gratisoft.us/todd/papers/strlcpy.html 865 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 866 867 namespace { 868 869 template <typename CHAR> 870 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 871 for (size_t i = 0; i < dst_size; ++i) { 872 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 873 return i; 874 } 875 876 // We were left off at dst_size. We over copied 1 byte. Null terminate. 877 if (dst_size != 0) 878 dst[dst_size - 1] = 0; 879 880 // Count the rest of the |src|, and return it's length in characters. 881 while (src[dst_size]) ++dst_size; 882 return dst_size; 883 } 884 885 } // namespace 886 887 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 888 return lcpyT<char>(dst, src, dst_size); 889 } 890 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 891 return lcpyT<wchar_t>(dst, src, dst_size); 892 } 893