1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/string_util.h" 6 7 #include "build/build_config.h" 8 9 #include <ctype.h> 10 #include <errno.h> 11 #include <math.h> 12 #include <stdarg.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <string.h> 16 #include <time.h> 17 #include <wchar.h> 18 #include <wctype.h> 19 20 #include <algorithm> 21 #include <vector> 22 23 #include "base/basictypes.h" 24 #include "base/logging.h" 25 #include "base/singleton.h" 26 #include "base/third_party/dmg_fp/dmg_fp.h" 27 28 namespace { 29 30 // Force the singleton used by Empty[W]String[16] to be a unique type. This 31 // prevents other code that might accidentally use Singleton<string> from 32 // getting our internal one. 33 struct EmptyStrings { 34 EmptyStrings() {} 35 const std::string s; 36 const std::wstring ws; 37 const string16 s16; 38 }; 39 40 // Used by ReplaceStringPlaceholders to track the position in the string of 41 // replaced parameters. 42 struct ReplacementOffset { 43 ReplacementOffset(uintptr_t parameter, size_t offset) 44 : parameter(parameter), 45 offset(offset) {} 46 47 // Index of the parameter. 48 uintptr_t parameter; 49 50 // Starting position in the string. 51 size_t offset; 52 }; 53 54 static bool CompareParameter(const ReplacementOffset& elem1, 55 const ReplacementOffset& elem2) { 56 return elem1.parameter < elem2.parameter; 57 } 58 59 // Generalized string-to-number conversion. 60 // 61 // StringToNumberTraits should provide: 62 // - a typedef for string_type, the STL string type used as input. 63 // - a typedef for value_type, the target numeric type. 64 // - a static function, convert_func, which dispatches to an appropriate 65 // strtol-like function and returns type value_type. 66 // - a static function, valid_func, which validates |input| and returns a bool 67 // indicating whether it is in proper form. This is used to check for 68 // conditions that convert_func tolerates but should result in 69 // StringToNumber returning false. For strtol-like funtions, valid_func 70 // should check for leading whitespace. 71 template<typename StringToNumberTraits> 72 bool StringToNumber(const typename StringToNumberTraits::string_type& input, 73 typename StringToNumberTraits::value_type* output) { 74 typedef StringToNumberTraits traits; 75 76 errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows. 77 typename traits::string_type::value_type* endptr = NULL; 78 typename traits::value_type value = traits::convert_func(input.c_str(), 79 &endptr); 80 *output = value; 81 82 // Cases to return false: 83 // - If errno is ERANGE, there was an overflow or underflow. 84 // - If the input string is empty, there was nothing to parse. 85 // - If endptr does not point to the end of the string, there are either 86 // characters remaining in the string after a parsed number, or the string 87 // does not begin with a parseable number. endptr is compared to the 88 // expected end given the string's stated length to correctly catch cases 89 // where the string contains embedded NUL characters. 90 // - valid_func determines that the input is not in preferred form. 91 return errno == 0 && 92 !input.empty() && 93 input.c_str() + input.length() == endptr && 94 traits::valid_func(input); 95 } 96 97 static int strtoi(const char *nptr, char **endptr, int base) { 98 long res = strtol(nptr, endptr, base); 99 #if __LP64__ 100 // Long is 64-bits, we have to handle under/overflow ourselves. 101 if (res > kint32max) { 102 res = kint32max; 103 errno = ERANGE; 104 } else if (res < kint32min) { 105 res = kint32min; 106 errno = ERANGE; 107 } 108 #endif 109 return static_cast<int>(res); 110 } 111 112 static unsigned int strtoui(const char *nptr, char **endptr, int base) { 113 unsigned long res = strtoul(nptr, endptr, base); 114 #if __LP64__ 115 // Long is 64-bits, we have to handle under/overflow ourselves. Test to see 116 // if the result can fit into 32-bits (as signed or unsigned). 117 if (static_cast<int>(static_cast<long>(res)) != static_cast<long>(res) && 118 static_cast<unsigned int>(res) != res) { 119 res = kuint32max; 120 errno = ERANGE; 121 } 122 #endif 123 return static_cast<unsigned int>(res); 124 } 125 126 class StringToIntTraits { 127 public: 128 typedef std::string string_type; 129 typedef int value_type; 130 static const int kBase = 10; 131 static inline value_type convert_func(const string_type::value_type* str, 132 string_type::value_type** endptr) { 133 return strtoi(str, endptr, kBase); 134 } 135 static inline bool valid_func(const string_type& str) { 136 return !str.empty() && !isspace(str[0]); 137 } 138 }; 139 140 class String16ToIntTraits { 141 public: 142 typedef string16 string_type; 143 typedef int value_type; 144 static const int kBase = 10; 145 static inline value_type convert_func(const string_type::value_type* str, 146 string_type::value_type** endptr) { 147 #if defined(WCHAR_T_IS_UTF16) 148 return wcstol(str, endptr, kBase); 149 #elif defined(WCHAR_T_IS_UTF32) 150 std::string ascii_string = UTF16ToASCII(string16(str)); 151 char* ascii_end = NULL; 152 value_type ret = strtoi(ascii_string.c_str(), &ascii_end, kBase); 153 if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 154 *endptr = 155 const_cast<string_type::value_type*>(str) + ascii_string.length(); 156 } 157 return ret; 158 #endif 159 } 160 static inline bool valid_func(const string_type& str) { 161 return !str.empty() && !iswspace(str[0]); 162 } 163 }; 164 165 class StringToInt64Traits { 166 public: 167 typedef std::string string_type; 168 typedef int64 value_type; 169 static const int kBase = 10; 170 static inline value_type convert_func(const string_type::value_type* str, 171 string_type::value_type** endptr) { 172 #ifdef OS_WIN 173 return _strtoi64(str, endptr, kBase); 174 #else // assume OS_POSIX 175 return strtoll(str, endptr, kBase); 176 #endif 177 } 178 static inline bool valid_func(const string_type& str) { 179 return !str.empty() && !isspace(str[0]); 180 } 181 }; 182 183 class String16ToInt64Traits { 184 public: 185 typedef string16 string_type; 186 typedef int64 value_type; 187 static const int kBase = 10; 188 static inline value_type convert_func(const string_type::value_type* str, 189 string_type::value_type** endptr) { 190 #ifdef OS_WIN 191 return _wcstoi64(str, endptr, kBase); 192 #else // assume OS_POSIX 193 std::string ascii_string = UTF16ToASCII(string16(str)); 194 char* ascii_end = NULL; 195 value_type ret = strtoll(ascii_string.c_str(), &ascii_end, kBase); 196 if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 197 *endptr = 198 const_cast<string_type::value_type*>(str) + ascii_string.length(); 199 } 200 return ret; 201 #endif 202 } 203 static inline bool valid_func(const string_type& str) { 204 return !str.empty() && !iswspace(str[0]); 205 } 206 }; 207 208 // For the HexString variants, use the unsigned variants like strtoul for 209 // convert_func so that input like "0x80000000" doesn't result in an overflow. 210 211 class HexStringToIntTraits { 212 public: 213 typedef std::string string_type; 214 typedef int value_type; 215 static const int kBase = 16; 216 static inline value_type convert_func(const string_type::value_type* str, 217 string_type::value_type** endptr) { 218 return strtoui(str, endptr, kBase); 219 } 220 static inline bool valid_func(const string_type& str) { 221 return !str.empty() && !isspace(str[0]); 222 } 223 }; 224 225 class HexString16ToIntTraits { 226 public: 227 typedef string16 string_type; 228 typedef int value_type; 229 static const int kBase = 16; 230 static inline value_type convert_func(const string_type::value_type* str, 231 string_type::value_type** endptr) { 232 #if defined(WCHAR_T_IS_UTF16) 233 return wcstoul(str, endptr, kBase); 234 #elif defined(WCHAR_T_IS_UTF32) 235 std::string ascii_string = UTF16ToASCII(string16(str)); 236 char* ascii_end = NULL; 237 value_type ret = strtoui(ascii_string.c_str(), &ascii_end, kBase); 238 if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 239 *endptr = 240 const_cast<string_type::value_type*>(str) + ascii_string.length(); 241 } 242 return ret; 243 #endif 244 } 245 static inline bool valid_func(const string_type& str) { 246 return !str.empty() && !iswspace(str[0]); 247 } 248 }; 249 250 class StringToDoubleTraits { 251 public: 252 typedef std::string string_type; 253 typedef double value_type; 254 static inline value_type convert_func(const string_type::value_type* str, 255 string_type::value_type** endptr) { 256 return dmg_fp::strtod(str, endptr); 257 } 258 static inline bool valid_func(const string_type& str) { 259 return !str.empty() && !isspace(str[0]); 260 } 261 }; 262 263 class String16ToDoubleTraits { 264 public: 265 typedef string16 string_type; 266 typedef double value_type; 267 static inline value_type convert_func(const string_type::value_type* str, 268 string_type::value_type** endptr) { 269 // Because dmg_fp::strtod does not like char16, we convert it to ASCII. 270 // In theory, this should be safe, but it's possible that 16-bit chars 271 // might get ignored by accident causing something to be parsed when it 272 // shouldn't. 273 std::string ascii_string = UTF16ToASCII(string16(str)); 274 char* ascii_end = NULL; 275 value_type ret = dmg_fp::strtod(ascii_string.c_str(), &ascii_end); 276 if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 277 // Put endptr at end of input string, so it's not recognized as an error. 278 *endptr = 279 const_cast<string_type::value_type*>(str) + ascii_string.length(); 280 } 281 282 return ret; 283 } 284 static inline bool valid_func(const string_type& str) { 285 return !str.empty() && !iswspace(str[0]); 286 } 287 }; 288 289 } // namespace 290 291 292 namespace base { 293 294 bool IsWprintfFormatPortable(const wchar_t* format) { 295 for (const wchar_t* position = format; *position != '\0'; ++position) { 296 if (*position == '%') { 297 bool in_specification = true; 298 bool modifier_l = false; 299 while (in_specification) { 300 // Eat up characters until reaching a known specifier. 301 if (*++position == '\0') { 302 // The format string ended in the middle of a specification. Call 303 // it portable because no unportable specifications were found. The 304 // string is equally broken on all platforms. 305 return true; 306 } 307 308 if (*position == 'l') { 309 // 'l' is the only thing that can save the 's' and 'c' specifiers. 310 modifier_l = true; 311 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 312 *position == 'S' || *position == 'C' || *position == 'F' || 313 *position == 'D' || *position == 'O' || *position == 'U') { 314 // Not portable. 315 return false; 316 } 317 318 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 319 // Portable, keep scanning the rest of the format string. 320 in_specification = false; 321 } 322 } 323 } 324 } 325 326 return true; 327 } 328 329 330 } // namespace base 331 332 333 const std::string& EmptyString() { 334 return Singleton<EmptyStrings>::get()->s; 335 } 336 337 const std::wstring& EmptyWString() { 338 return Singleton<EmptyStrings>::get()->ws; 339 } 340 341 const string16& EmptyString16() { 342 return Singleton<EmptyStrings>::get()->s16; 343 } 344 345 #define WHITESPACE_UNICODE \ 346 0x0009, /* <control-0009> to <control-000D> */ \ 347 0x000A, \ 348 0x000B, \ 349 0x000C, \ 350 0x000D, \ 351 0x0020, /* Space */ \ 352 0x0085, /* <control-0085> */ \ 353 0x00A0, /* No-Break Space */ \ 354 0x1680, /* Ogham Space Mark */ \ 355 0x180E, /* Mongolian Vowel Separator */ \ 356 0x2000, /* En Quad to Hair Space */ \ 357 0x2001, \ 358 0x2002, \ 359 0x2003, \ 360 0x2004, \ 361 0x2005, \ 362 0x2006, \ 363 0x2007, \ 364 0x2008, \ 365 0x2009, \ 366 0x200A, \ 367 0x200C, /* Zero Width Non-Joiner */ \ 368 0x2028, /* Line Separator */ \ 369 0x2029, /* Paragraph Separator */ \ 370 0x202F, /* Narrow No-Break Space */ \ 371 0x205F, /* Medium Mathematical Space */ \ 372 0x3000, /* Ideographic Space */ \ 373 0 374 375 const wchar_t kWhitespaceWide[] = { 376 WHITESPACE_UNICODE 377 }; 378 const char16 kWhitespaceUTF16[] = { 379 WHITESPACE_UNICODE 380 }; 381 const char kWhitespaceASCII[] = { 382 0x09, // <control-0009> to <control-000D> 383 0x0A, 384 0x0B, 385 0x0C, 386 0x0D, 387 0x20, // Space 388 0 389 }; 390 391 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; 392 393 template<typename STR> 394 TrimPositions TrimStringT(const STR& input, 395 const typename STR::value_type trim_chars[], 396 TrimPositions positions, 397 STR* output) { 398 // Find the edges of leading/trailing whitespace as desired. 399 const typename STR::size_type last_char = input.length() - 1; 400 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? 401 input.find_first_not_of(trim_chars) : 0; 402 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? 403 input.find_last_not_of(trim_chars) : last_char; 404 405 // When the string was all whitespace, report that we stripped off whitespace 406 // from whichever position the caller was interested in. For empty input, we 407 // stripped no whitespace, but we still need to clear |output|. 408 if (input.empty() || 409 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 410 bool input_was_empty = input.empty(); // in case output == &input 411 output->clear(); 412 return input_was_empty ? TRIM_NONE : positions; 413 } 414 415 // Trim the whitespace. 416 *output = 417 input.substr(first_good_char, last_good_char - first_good_char + 1); 418 419 // Return where we trimmed from. 420 return static_cast<TrimPositions>( 421 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 422 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 423 } 424 425 bool TrimString(const std::wstring& input, 426 const wchar_t trim_chars[], 427 std::wstring* output) { 428 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 429 } 430 431 #if !defined(WCHAR_T_IS_UTF16) 432 bool TrimString(const string16& input, 433 const char16 trim_chars[], 434 string16* output) { 435 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 436 } 437 #endif 438 439 bool TrimString(const std::string& input, 440 const char trim_chars[], 441 std::string* output) { 442 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 443 } 444 445 TrimPositions TrimWhitespace(const std::wstring& input, 446 TrimPositions positions, 447 std::wstring* output) { 448 return TrimStringT(input, kWhitespaceWide, positions, output); 449 } 450 451 #if !defined(WCHAR_T_IS_UTF16) 452 TrimPositions TrimWhitespace(const string16& input, 453 TrimPositions positions, 454 string16* output) { 455 return TrimStringT(input, kWhitespaceUTF16, positions, output); 456 } 457 #endif 458 459 TrimPositions TrimWhitespaceASCII(const std::string& input, 460 TrimPositions positions, 461 std::string* output) { 462 return TrimStringT(input, kWhitespaceASCII, positions, output); 463 } 464 465 // This function is only for backward-compatibility. 466 // To be removed when all callers are updated. 467 TrimPositions TrimWhitespace(const std::string& input, 468 TrimPositions positions, 469 std::string* output) { 470 return TrimWhitespaceASCII(input, positions, output); 471 } 472 473 template<typename STR> 474 STR CollapseWhitespaceT(const STR& text, 475 bool trim_sequences_with_line_breaks) { 476 STR result; 477 result.resize(text.size()); 478 479 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 480 // will trim any leading whitespace. 481 bool in_whitespace = true; 482 bool already_trimmed = true; 483 484 int chars_written = 0; 485 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 486 if (IsWhitespace(*i)) { 487 if (!in_whitespace) { 488 // Reduce all whitespace sequences to a single space. 489 in_whitespace = true; 490 result[chars_written++] = L' '; 491 } 492 if (trim_sequences_with_line_breaks && !already_trimmed && 493 ((*i == '\n') || (*i == '\r'))) { 494 // Whitespace sequences containing CR or LF are eliminated entirely. 495 already_trimmed = true; 496 --chars_written; 497 } 498 } else { 499 // Non-whitespace chracters are copied straight across. 500 in_whitespace = false; 501 already_trimmed = false; 502 result[chars_written++] = *i; 503 } 504 } 505 506 if (in_whitespace && !already_trimmed) { 507 // Any trailing whitespace is eliminated. 508 --chars_written; 509 } 510 511 result.resize(chars_written); 512 return result; 513 } 514 515 std::wstring CollapseWhitespace(const std::wstring& text, 516 bool trim_sequences_with_line_breaks) { 517 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 518 } 519 520 #if !defined(WCHAR_T_IS_UTF16) 521 string16 CollapseWhitespace(const string16& text, 522 bool trim_sequences_with_line_breaks) { 523 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 524 } 525 #endif 526 527 std::string CollapseWhitespaceASCII(const std::string& text, 528 bool trim_sequences_with_line_breaks) { 529 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 530 } 531 532 bool ContainsOnlyWhitespaceASCII(const std::string& str) { 533 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { 534 if (!IsAsciiWhitespace(*i)) 535 return false; 536 } 537 return true; 538 } 539 540 bool ContainsOnlyWhitespace(const string16& str) { 541 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) { 542 if (!IsWhitespace(*i)) 543 return false; 544 } 545 return true; 546 } 547 548 std::string WideToASCII(const std::wstring& wide) { 549 DCHECK(IsStringASCII(wide)) << wide; 550 return std::string(wide.begin(), wide.end()); 551 } 552 553 std::wstring ASCIIToWide(const base::StringPiece& ascii) { 554 DCHECK(IsStringASCII(ascii)) << ascii; 555 return std::wstring(ascii.begin(), ascii.end()); 556 } 557 558 std::string UTF16ToASCII(const string16& utf16) { 559 DCHECK(IsStringASCII(utf16)) << utf16; 560 return std::string(utf16.begin(), utf16.end()); 561 } 562 563 string16 ASCIIToUTF16(const base::StringPiece& ascii) { 564 DCHECK(IsStringASCII(ascii)) << ascii; 565 return string16(ascii.begin(), ascii.end()); 566 } 567 568 // Latin1 is just the low range of Unicode, so we can copy directly to convert. 569 bool WideToLatin1(const std::wstring& wide, std::string* latin1) { 570 std::string output; 571 output.resize(wide.size()); 572 latin1->clear(); 573 for (size_t i = 0; i < wide.size(); i++) { 574 if (wide[i] > 255) 575 return false; 576 output[i] = static_cast<char>(wide[i]); 577 } 578 latin1->swap(output); 579 return true; 580 } 581 582 bool IsString8Bit(const std::wstring& str) { 583 for (size_t i = 0; i < str.length(); i++) { 584 if (str[i] > 255) 585 return false; 586 } 587 return true; 588 } 589 590 template<class STR> 591 static bool DoIsStringASCII(const STR& str) { 592 for (size_t i = 0; i < str.length(); i++) { 593 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 594 if (c > 0x7F) 595 return false; 596 } 597 return true; 598 } 599 600 bool IsStringASCII(const std::wstring& str) { 601 return DoIsStringASCII(str); 602 } 603 604 #if !defined(WCHAR_T_IS_UTF16) 605 bool IsStringASCII(const string16& str) { 606 return DoIsStringASCII(str); 607 } 608 #endif 609 610 bool IsStringASCII(const base::StringPiece& str) { 611 return DoIsStringASCII(str); 612 } 613 614 // Helper functions that determine whether the given character begins a 615 // UTF-8 sequence of bytes with the given length. A character satisfies 616 // "IsInUTF8Sequence" if it is anything but the first byte in a multi-byte 617 // character. 618 static inline bool IsBegin2ByteUTF8(int c) { 619 return (c & 0xE0) == 0xC0; 620 } 621 static inline bool IsBegin3ByteUTF8(int c) { 622 return (c & 0xF0) == 0xE0; 623 } 624 static inline bool IsBegin4ByteUTF8(int c) { 625 return (c & 0xF8) == 0xF0; 626 } 627 static inline bool IsInUTF8Sequence(int c) { 628 return (c & 0xC0) == 0x80; 629 } 630 631 // This function was copied from Mozilla, with modifications. The original code 632 // was 'IsUTF8' in xpcom/string/src/nsReadableUtils.cpp. The license block for 633 // this function is: 634 // This function subject to the Mozilla Public License Version 635 // 1.1 (the "License"); you may not use this code except in compliance with 636 // the License. You may obtain a copy of the License at 637 // http://www.mozilla.org/MPL/ 638 // 639 // Software distributed under the License is distributed on an "AS IS" basis, 640 // WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 641 // for the specific language governing rights and limitations under the 642 // License. 643 // 644 // The Original Code is mozilla.org code. 645 // 646 // The Initial Developer of the Original Code is 647 // Netscape Communications Corporation. 648 // Portions created by the Initial Developer are Copyright (C) 2000 649 // the Initial Developer. All Rights Reserved. 650 // 651 // Contributor(s): 652 // Scott Collins <scc (at) mozilla.org> (original author) 653 // 654 // This is a template so that it can be run on wide and 8-bit strings. We want 655 // to run it on wide strings when we have input that we think may have 656 // originally been UTF-8, but has been converted to wide characters because 657 // that's what we (and Windows) use internally. 658 template<typename CHAR> 659 static bool IsStringUTF8T(const CHAR* str, size_t length) { 660 bool overlong = false; 661 bool surrogate = false; 662 bool nonchar = false; 663 664 // overlong byte upper bound 665 typename ToUnsigned<CHAR>::Unsigned olupper = 0; 666 667 // surrogate byte lower bound 668 typename ToUnsigned<CHAR>::Unsigned slower = 0; 669 670 // incremented when inside a multi-byte char to indicate how many bytes 671 // are left in the sequence 672 int positions_left = 0; 673 674 for (uintptr_t i = 0; i < length; i++) { 675 // This whole function assume an unsigned value so force its conversion to 676 // an unsigned value. 677 typename ToUnsigned<CHAR>::Unsigned c = str[i]; 678 if (c < 0x80) 679 continue; // ASCII 680 681 if (c <= 0xC1) { 682 // [80-BF] where not expected, [C0-C1] for overlong 683 return false; 684 } else if (IsBegin2ByteUTF8(c)) { 685 positions_left = 1; 686 } else if (IsBegin3ByteUTF8(c)) { 687 positions_left = 2; 688 if (c == 0xE0) { 689 // to exclude E0[80-9F][80-BF] 690 overlong = true; 691 olupper = 0x9F; 692 } else if (c == 0xED) { 693 // ED[A0-BF][80-BF]: surrogate codepoint 694 surrogate = true; 695 slower = 0xA0; 696 } else if (c == 0xEF) { 697 // EF BF [BE-BF] : non-character 698 // TODO(jungshik): EF B7 [90-AF] should be checked as well. 699 nonchar = true; 700 } 701 } else if (c <= 0xF4) { 702 positions_left = 3; 703 nonchar = true; 704 if (c == 0xF0) { 705 // to exclude F0[80-8F][80-BF]{2} 706 overlong = true; 707 olupper = 0x8F; 708 } else if (c == 0xF4) { 709 // to exclude F4[90-BF][80-BF] 710 // actually not surrogates but codepoints beyond 0x10FFFF 711 surrogate = true; 712 slower = 0x90; 713 } 714 } else { 715 return false; 716 } 717 718 // eat the rest of this multi-byte character 719 while (positions_left) { 720 positions_left--; 721 i++; 722 c = str[i]; 723 if (!c) 724 return false; // end of string but not end of character sequence 725 726 // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] 727 if (nonchar && ((!positions_left && c < 0xBE) || 728 (positions_left == 1 && c != 0xBF) || 729 (positions_left == 2 && 0x0F != (0x0F & c) ))) { 730 nonchar = false; 731 } 732 if (!IsInUTF8Sequence(c) || (overlong && c <= olupper) || 733 (surrogate && slower <= c) || (nonchar && !positions_left) ) { 734 return false; 735 } 736 overlong = surrogate = false; 737 } 738 } 739 return true; 740 } 741 742 bool IsStringUTF8(const std::string& str) { 743 return IsStringUTF8T(str.data(), str.length()); 744 } 745 746 bool IsStringWideUTF8(const std::wstring& str) { 747 return IsStringUTF8T(str.data(), str.length()); 748 } 749 750 template<typename Iter> 751 static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 752 Iter a_end, 753 const char* b) { 754 for (Iter it = a_begin; it != a_end; ++it, ++b) { 755 if (!*b || ToLowerASCII(*it) != *b) 756 return false; 757 } 758 return *b == 0; 759 } 760 761 // Front-ends for LowerCaseEqualsASCII. 762 bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 763 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 764 } 765 766 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) { 767 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 768 } 769 770 #if !defined(WCHAR_T_IS_UTF16) 771 bool LowerCaseEqualsASCII(const string16& a, const char* b) { 772 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 773 } 774 #endif 775 776 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 777 std::string::const_iterator a_end, 778 const char* b) { 779 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 780 } 781 782 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, 783 std::wstring::const_iterator a_end, 784 const char* b) { 785 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 786 } 787 788 #if !defined(WCHAR_T_IS_UTF16) 789 bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 790 string16::const_iterator a_end, 791 const char* b) { 792 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 793 } 794 #endif 795 796 bool LowerCaseEqualsASCII(const char* a_begin, 797 const char* a_end, 798 const char* b) { 799 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 800 } 801 802 bool LowerCaseEqualsASCII(const wchar_t* a_begin, 803 const wchar_t* a_end, 804 const char* b) { 805 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 806 } 807 808 #if !defined(WCHAR_T_IS_UTF16) 809 bool LowerCaseEqualsASCII(const char16* a_begin, 810 const char16* a_end, 811 const char* b) { 812 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 813 } 814 #endif 815 816 bool EqualsASCII(const string16& a, const base::StringPiece& b) { 817 if (a.length() != b.length()) 818 return false; 819 return std::equal(b.begin(), b.end(), a.begin()); 820 } 821 822 bool StartsWithASCII(const std::string& str, 823 const std::string& search, 824 bool case_sensitive) { 825 if (case_sensitive) 826 return str.compare(0, search.length(), search) == 0; 827 else 828 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 829 } 830 831 template <typename STR> 832 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 833 if (case_sensitive) { 834 return str.compare(0, search.length(), search) == 0; 835 } else { 836 if (search.size() > str.size()) 837 return false; 838 return std::equal(search.begin(), search.end(), str.begin(), 839 CaseInsensitiveCompare<typename STR::value_type>()); 840 } 841 } 842 843 bool StartsWith(const std::wstring& str, const std::wstring& search, 844 bool case_sensitive) { 845 return StartsWithT(str, search, case_sensitive); 846 } 847 848 #if !defined(WCHAR_T_IS_UTF16) 849 bool StartsWith(const string16& str, const string16& search, 850 bool case_sensitive) { 851 return StartsWithT(str, search, case_sensitive); 852 } 853 #endif 854 855 template <typename STR> 856 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 857 typename STR::size_type str_length = str.length(); 858 typename STR::size_type search_length = search.length(); 859 if (search_length > str_length) 860 return false; 861 if (case_sensitive) { 862 return str.compare(str_length - search_length, search_length, search) == 0; 863 } else { 864 return std::equal(search.begin(), search.end(), 865 str.begin() + (str_length - search_length), 866 CaseInsensitiveCompare<typename STR::value_type>()); 867 } 868 } 869 870 bool EndsWith(const std::string& str, const std::string& search, 871 bool case_sensitive) { 872 return EndsWithT(str, search, case_sensitive); 873 } 874 875 bool EndsWith(const std::wstring& str, const std::wstring& search, 876 bool case_sensitive) { 877 return EndsWithT(str, search, case_sensitive); 878 } 879 880 #if !defined(WCHAR_T_IS_UTF16) 881 bool EndsWith(const string16& str, const string16& search, 882 bool case_sensitive) { 883 return EndsWithT(str, search, case_sensitive); 884 } 885 #endif 886 887 DataUnits GetByteDisplayUnits(int64 bytes) { 888 // The byte thresholds at which we display amounts. A byte count is displayed 889 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1]. 890 // This must match the DataUnits enum. 891 static const int64 kUnitThresholds[] = { 892 0, // DATA_UNITS_BYTE, 893 3*1024, // DATA_UNITS_KIBIBYTE, 894 2*1024*1024, // DATA_UNITS_MEBIBYTE, 895 1024*1024*1024 // DATA_UNITS_GIBIBYTE, 896 }; 897 898 if (bytes < 0) { 899 NOTREACHED() << "Negative bytes value"; 900 return DATA_UNITS_BYTE; 901 } 902 903 int unit_index = arraysize(kUnitThresholds); 904 while (--unit_index > 0) { 905 if (bytes >= kUnitThresholds[unit_index]) 906 break; 907 } 908 909 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE); 910 return DataUnits(unit_index); 911 } 912 913 // TODO(mpcomplete): deal with locale 914 // Byte suffixes. This must match the DataUnits enum. 915 static const wchar_t* const kByteStrings[] = { 916 L"B", 917 L"kB", 918 L"MB", 919 L"GB" 920 }; 921 922 static const wchar_t* const kSpeedStrings[] = { 923 L"B/s", 924 L"kB/s", 925 L"MB/s", 926 L"GB/s" 927 }; 928 929 std::wstring FormatBytesInternal(int64 bytes, 930 DataUnits units, 931 bool show_units, 932 const wchar_t* const* suffix) { 933 if (bytes < 0) { 934 NOTREACHED() << "Negative bytes value"; 935 return std::wstring(); 936 } 937 938 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE); 939 940 // Put the quantity in the right units. 941 double unit_amount = static_cast<double>(bytes); 942 for (int i = 0; i < units; ++i) 943 unit_amount /= 1024.0; 944 945 wchar_t tmp[64]; 946 // If the first decimal digit is 0, don't show it. 947 double int_part; 948 double fractional_part = modf(unit_amount, &int_part); 949 modf(fractional_part * 10, &int_part); 950 if (int_part == 0) { 951 base::swprintf(tmp, arraysize(tmp), 952 L"%lld", static_cast<int64>(unit_amount)); 953 } else { 954 base::swprintf(tmp, arraysize(tmp), L"%.1lf", unit_amount); 955 } 956 957 std::wstring ret(tmp); 958 if (show_units) { 959 ret += L" "; 960 ret += suffix[units]; 961 } 962 963 return ret; 964 } 965 966 std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units) { 967 return FormatBytesInternal(bytes, units, show_units, kByteStrings); 968 } 969 970 std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units) { 971 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings); 972 } 973 974 template<class StringType> 975 void DoReplaceSubstringsAfterOffset(StringType* str, 976 typename StringType::size_type start_offset, 977 const StringType& find_this, 978 const StringType& replace_with, 979 bool replace_all) { 980 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 981 return; 982 983 DCHECK(!find_this.empty()); 984 for (typename StringType::size_type offs(str->find(find_this, start_offset)); 985 offs != StringType::npos; offs = str->find(find_this, offs)) { 986 str->replace(offs, find_this.length(), replace_with); 987 offs += replace_with.length(); 988 989 if (!replace_all) 990 break; 991 } 992 } 993 994 void ReplaceFirstSubstringAfterOffset(string16* str, 995 string16::size_type start_offset, 996 const string16& find_this, 997 const string16& replace_with) { 998 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 999 false); // replace first instance 1000 } 1001 1002 void ReplaceFirstSubstringAfterOffset(std::string* str, 1003 std::string::size_type start_offset, 1004 const std::string& find_this, 1005 const std::string& replace_with) { 1006 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 1007 false); // replace first instance 1008 } 1009 1010 void ReplaceSubstringsAfterOffset(string16* str, 1011 string16::size_type start_offset, 1012 const string16& find_this, 1013 const string16& replace_with) { 1014 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 1015 true); // replace all instances 1016 } 1017 1018 void ReplaceSubstringsAfterOffset(std::string* str, 1019 std::string::size_type start_offset, 1020 const std::string& find_this, 1021 const std::string& replace_with) { 1022 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 1023 true); // replace all instances 1024 } 1025 1026 // Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter 1027 // is the size of the buffer. These return the number of characters in the 1028 // formatted string excluding the NUL terminator. If the buffer is not 1029 // large enough to accommodate the formatted string without truncation, they 1030 // return the number of characters that would be in the fully-formatted string 1031 // (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms). 1032 inline int vsnprintfT(char* buffer, 1033 size_t buf_size, 1034 const char* format, 1035 va_list argptr) { 1036 return base::vsnprintf(buffer, buf_size, format, argptr); 1037 } 1038 1039 inline int vsnprintfT(wchar_t* buffer, 1040 size_t buf_size, 1041 const wchar_t* format, 1042 va_list argptr) { 1043 return base::vswprintf(buffer, buf_size, format, argptr); 1044 } 1045 1046 // Templatized backend for StringPrintF/StringAppendF. This does not finalize 1047 // the va_list, the caller is expected to do that. 1048 template <class StringType> 1049 static void StringAppendVT(StringType* dst, 1050 const typename StringType::value_type* format, 1051 va_list ap) { 1052 // First try with a small fixed size buffer. 1053 // This buffer size should be kept in sync with StringUtilTest.GrowBoundary 1054 // and StringUtilTest.StringPrintfBounds. 1055 typename StringType::value_type stack_buf[1024]; 1056 1057 va_list ap_copy; 1058 GG_VA_COPY(ap_copy, ap); 1059 1060 #if !defined(OS_WIN) 1061 errno = 0; 1062 #endif 1063 int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, ap_copy); 1064 va_end(ap_copy); 1065 1066 if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) { 1067 // It fit. 1068 dst->append(stack_buf, result); 1069 return; 1070 } 1071 1072 // Repeatedly increase buffer size until it fits. 1073 int mem_length = arraysize(stack_buf); 1074 while (true) { 1075 if (result < 0) { 1076 #if !defined(OS_WIN) 1077 // On Windows, vsnprintfT always returns the number of characters in a 1078 // fully-formatted string, so if we reach this point, something else is 1079 // wrong and no amount of buffer-doubling is going to fix it. 1080 if (errno != 0 && errno != EOVERFLOW) 1081 #endif 1082 { 1083 // If an error other than overflow occurred, it's never going to work. 1084 DLOG(WARNING) << "Unable to printf the requested string due to error."; 1085 return; 1086 } 1087 // Try doubling the buffer size. 1088 mem_length *= 2; 1089 } else { 1090 // We need exactly "result + 1" characters. 1091 mem_length = result + 1; 1092 } 1093 1094 if (mem_length > 32 * 1024 * 1024) { 1095 // That should be plenty, don't try anything larger. This protects 1096 // against huge allocations when using vsnprintfT implementations that 1097 // return -1 for reasons other than overflow without setting errno. 1098 DLOG(WARNING) << "Unable to printf the requested string due to size."; 1099 return; 1100 } 1101 1102 std::vector<typename StringType::value_type> mem_buf(mem_length); 1103 1104 // NOTE: You can only use a va_list once. Since we're in a while loop, we 1105 // need to make a new copy each time so we don't use up the original. 1106 GG_VA_COPY(ap_copy, ap); 1107 result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy); 1108 va_end(ap_copy); 1109 1110 if ((result >= 0) && (result < mem_length)) { 1111 // It fit. 1112 dst->append(&mem_buf[0], result); 1113 return; 1114 } 1115 } 1116 } 1117 1118 namespace { 1119 1120 template <typename STR, typename INT, typename UINT, bool NEG> 1121 struct IntToStringT { 1122 // This is to avoid a compiler warning about unary minus on unsigned type. 1123 // For example, say you had the following code: 1124 // template <typename INT> 1125 // INT abs(INT value) { return value < 0 ? -value : value; } 1126 // Even though if INT is unsigned, it's impossible for value < 0, so the 1127 // unary minus will never be taken, the compiler will still generate a 1128 // warning. We do a little specialization dance... 1129 template <typename INT2, typename UINT2, bool NEG2> 1130 struct ToUnsignedT { }; 1131 1132 template <typename INT2, typename UINT2> 1133 struct ToUnsignedT<INT2, UINT2, false> { 1134 static UINT2 ToUnsigned(INT2 value) { 1135 return static_cast<UINT2>(value); 1136 } 1137 }; 1138 1139 template <typename INT2, typename UINT2> 1140 struct ToUnsignedT<INT2, UINT2, true> { 1141 static UINT2 ToUnsigned(INT2 value) { 1142 return static_cast<UINT2>(value < 0 ? -value : value); 1143 } 1144 }; 1145 1146 static STR IntToString(INT value) { 1147 // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. 1148 // So round up to allocate 3 output characters per byte, plus 1 for '-'. 1149 const int kOutputBufSize = 3 * sizeof(INT) + 1; 1150 1151 // Allocate the whole string right away, we will right back to front, and 1152 // then return the substr of what we ended up using. 1153 STR outbuf(kOutputBufSize, 0); 1154 1155 bool is_neg = value < 0; 1156 // Even though is_neg will never be true when INT is parameterized as 1157 // unsigned, even the presence of the unary operation causes a warning. 1158 UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value); 1159 1160 for (typename STR::iterator it = outbuf.end();;) { 1161 --it; 1162 DCHECK(it != outbuf.begin()); 1163 *it = static_cast<typename STR::value_type>((res % 10) + '0'); 1164 res /= 10; 1165 1166 // We're done.. 1167 if (res == 0) { 1168 if (is_neg) { 1169 --it; 1170 DCHECK(it != outbuf.begin()); 1171 *it = static_cast<typename STR::value_type>('-'); 1172 } 1173 return STR(it, outbuf.end()); 1174 } 1175 } 1176 NOTREACHED(); 1177 return STR(); 1178 } 1179 }; 1180 1181 } 1182 1183 std::string IntToString(int value) { 1184 return IntToStringT<std::string, int, unsigned int, true>:: 1185 IntToString(value); 1186 } 1187 std::wstring IntToWString(int value) { 1188 return IntToStringT<std::wstring, int, unsigned int, true>:: 1189 IntToString(value); 1190 } 1191 string16 IntToString16(int value) { 1192 return IntToStringT<string16, int, unsigned int, true>:: 1193 IntToString(value); 1194 } 1195 std::string UintToString(unsigned int value) { 1196 return IntToStringT<std::string, unsigned int, unsigned int, false>:: 1197 IntToString(value); 1198 } 1199 std::wstring UintToWString(unsigned int value) { 1200 return IntToStringT<std::wstring, unsigned int, unsigned int, false>:: 1201 IntToString(value); 1202 } 1203 string16 UintToString16(unsigned int value) { 1204 return IntToStringT<string16, unsigned int, unsigned int, false>:: 1205 IntToString(value); 1206 } 1207 std::string Int64ToString(int64 value) { 1208 return IntToStringT<std::string, int64, uint64, true>:: 1209 IntToString(value); 1210 } 1211 std::wstring Int64ToWString(int64 value) { 1212 return IntToStringT<std::wstring, int64, uint64, true>:: 1213 IntToString(value); 1214 } 1215 std::string Uint64ToString(uint64 value) { 1216 return IntToStringT<std::string, uint64, uint64, false>:: 1217 IntToString(value); 1218 } 1219 std::wstring Uint64ToWString(uint64 value) { 1220 return IntToStringT<std::wstring, uint64, uint64, false>:: 1221 IntToString(value); 1222 } 1223 1224 std::string DoubleToString(double value) { 1225 // According to g_fmt.cc, it is sufficient to declare a buffer of size 32. 1226 char buffer[32]; 1227 dmg_fp::g_fmt(buffer, value); 1228 return std::string(buffer); 1229 } 1230 1231 std::wstring DoubleToWString(double value) { 1232 return ASCIIToWide(DoubleToString(value)); 1233 } 1234 1235 void StringAppendV(std::string* dst, const char* format, va_list ap) { 1236 StringAppendVT(dst, format, ap); 1237 } 1238 1239 void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) { 1240 StringAppendVT(dst, format, ap); 1241 } 1242 1243 std::string StringPrintf(const char* format, ...) { 1244 va_list ap; 1245 va_start(ap, format); 1246 std::string result; 1247 StringAppendV(&result, format, ap); 1248 va_end(ap); 1249 return result; 1250 } 1251 1252 std::wstring StringPrintf(const wchar_t* format, ...) { 1253 va_list ap; 1254 va_start(ap, format); 1255 std::wstring result; 1256 StringAppendV(&result, format, ap); 1257 va_end(ap); 1258 return result; 1259 } 1260 1261 std::string StringPrintV(const char* format, va_list ap) { 1262 std::string result; 1263 StringAppendV(&result, format, ap); 1264 return result; 1265 } 1266 1267 const std::string& SStringPrintf(std::string* dst, const char* format, ...) { 1268 va_list ap; 1269 va_start(ap, format); 1270 dst->clear(); 1271 StringAppendV(dst, format, ap); 1272 va_end(ap); 1273 return *dst; 1274 } 1275 1276 const std::wstring& SStringPrintf(std::wstring* dst, 1277 const wchar_t* format, ...) { 1278 va_list ap; 1279 va_start(ap, format); 1280 dst->clear(); 1281 StringAppendV(dst, format, ap); 1282 va_end(ap); 1283 return *dst; 1284 } 1285 1286 void StringAppendF(std::string* dst, const char* format, ...) { 1287 va_list ap; 1288 va_start(ap, format); 1289 StringAppendV(dst, format, ap); 1290 va_end(ap); 1291 } 1292 1293 void StringAppendF(std::wstring* dst, const wchar_t* format, ...) { 1294 va_list ap; 1295 va_start(ap, format); 1296 StringAppendV(dst, format, ap); 1297 va_end(ap); 1298 } 1299 1300 template<typename STR> 1301 static void SplitStringT(const STR& str, 1302 const typename STR::value_type s, 1303 bool trim_whitespace, 1304 std::vector<STR>* r) { 1305 size_t last = 0; 1306 size_t i; 1307 size_t c = str.size(); 1308 for (i = 0; i <= c; ++i) { 1309 if (i == c || str[i] == s) { 1310 size_t len = i - last; 1311 STR tmp = str.substr(last, len); 1312 if (trim_whitespace) { 1313 STR t_tmp; 1314 TrimWhitespace(tmp, TRIM_ALL, &t_tmp); 1315 r->push_back(t_tmp); 1316 } else { 1317 r->push_back(tmp); 1318 } 1319 last = i + 1; 1320 } 1321 } 1322 } 1323 1324 void SplitString(const std::wstring& str, 1325 wchar_t s, 1326 std::vector<std::wstring>* r) { 1327 SplitStringT(str, s, true, r); 1328 } 1329 1330 #if !defined(WCHAR_T_IS_UTF16) 1331 void SplitString(const string16& str, 1332 char16 s, 1333 std::vector<string16>* r) { 1334 SplitStringT(str, s, true, r); 1335 } 1336 #endif 1337 1338 void SplitString(const std::string& str, 1339 char s, 1340 std::vector<std::string>* r) { 1341 SplitStringT(str, s, true, r); 1342 } 1343 1344 void SplitStringDontTrim(const std::wstring& str, 1345 wchar_t s, 1346 std::vector<std::wstring>* r) { 1347 SplitStringT(str, s, false, r); 1348 } 1349 1350 #if !defined(WCHAR_T_IS_UTF16) 1351 void SplitStringDontTrim(const string16& str, 1352 char16 s, 1353 std::vector<string16>* r) { 1354 SplitStringT(str, s, false, r); 1355 } 1356 #endif 1357 1358 void SplitStringDontTrim(const std::string& str, 1359 char s, 1360 std::vector<std::string>* r) { 1361 SplitStringT(str, s, false, r); 1362 } 1363 1364 template<typename STR> 1365 static size_t TokenizeT(const STR& str, 1366 const STR& delimiters, 1367 std::vector<STR>* tokens) { 1368 tokens->clear(); 1369 1370 typename STR::size_type start = str.find_first_not_of(delimiters); 1371 while (start != STR::npos) { 1372 typename STR::size_type end = str.find_first_of(delimiters, start + 1); 1373 if (end == STR::npos) { 1374 tokens->push_back(str.substr(start)); 1375 break; 1376 } else { 1377 tokens->push_back(str.substr(start, end - start)); 1378 start = str.find_first_not_of(delimiters, end + 1); 1379 } 1380 } 1381 1382 return tokens->size(); 1383 } 1384 1385 size_t Tokenize(const std::wstring& str, 1386 const std::wstring& delimiters, 1387 std::vector<std::wstring>* tokens) { 1388 return TokenizeT(str, delimiters, tokens); 1389 } 1390 1391 #if !defined(WCHAR_T_IS_UTF16) 1392 size_t Tokenize(const string16& str, 1393 const string16& delimiters, 1394 std::vector<string16>* tokens) { 1395 return TokenizeT(str, delimiters, tokens); 1396 } 1397 #endif 1398 1399 size_t Tokenize(const std::string& str, 1400 const std::string& delimiters, 1401 std::vector<std::string>* tokens) { 1402 return TokenizeT(str, delimiters, tokens); 1403 } 1404 1405 template<typename STR> 1406 static STR JoinStringT(const std::vector<STR>& parts, 1407 typename STR::value_type sep) { 1408 if (parts.size() == 0) return STR(); 1409 1410 STR result(parts[0]); 1411 typename std::vector<STR>::const_iterator iter = parts.begin(); 1412 ++iter; 1413 1414 for (; iter != parts.end(); ++iter) { 1415 result += sep; 1416 result += *iter; 1417 } 1418 1419 return result; 1420 } 1421 1422 std::string JoinString(const std::vector<std::string>& parts, char sep) { 1423 return JoinStringT(parts, sep); 1424 } 1425 1426 #if !defined(WCHAR_T_IS_UTF16) 1427 string16 JoinString(const std::vector<string16>& parts, char16 sep) { 1428 return JoinStringT(parts, sep); 1429 } 1430 #endif 1431 1432 std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t sep) { 1433 return JoinStringT(parts, sep); 1434 } 1435 1436 template<typename STR> 1437 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { 1438 const size_t length = str.length(); 1439 if (!length) 1440 return; 1441 1442 bool last_was_ws = false; 1443 size_t last_non_ws_start = 0; 1444 for (size_t i = 0; i < length; ++i) { 1445 switch (str[i]) { 1446 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. 1447 case L' ': 1448 case L'\t': 1449 case L'\xA': 1450 case L'\xB': 1451 case L'\xC': 1452 case L'\xD': 1453 if (!last_was_ws) { 1454 if (i > 0) { 1455 result->push_back( 1456 str.substr(last_non_ws_start, i - last_non_ws_start)); 1457 } 1458 last_was_ws = true; 1459 } 1460 break; 1461 1462 default: // Not a space character. 1463 if (last_was_ws) { 1464 last_was_ws = false; 1465 last_non_ws_start = i; 1466 } 1467 break; 1468 } 1469 } 1470 if (!last_was_ws) { 1471 result->push_back( 1472 str.substr(last_non_ws_start, length - last_non_ws_start)); 1473 } 1474 } 1475 1476 void SplitStringAlongWhitespace(const std::wstring& str, 1477 std::vector<std::wstring>* result) { 1478 SplitStringAlongWhitespaceT(str, result); 1479 } 1480 1481 #if !defined(WCHAR_T_IS_UTF16) 1482 void SplitStringAlongWhitespace(const string16& str, 1483 std::vector<string16>* result) { 1484 SplitStringAlongWhitespaceT(str, result); 1485 } 1486 #endif 1487 1488 void SplitStringAlongWhitespace(const std::string& str, 1489 std::vector<std::string>* result) { 1490 SplitStringAlongWhitespaceT(str, result); 1491 } 1492 1493 template<class FormatStringType, class OutStringType> 1494 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 1495 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 1496 size_t substitutions = subst.size(); 1497 DCHECK(substitutions < 10); 1498 1499 size_t sub_length = 0; 1500 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 1501 iter != subst.end(); ++iter) { 1502 sub_length += (*iter).length(); 1503 } 1504 1505 OutStringType formatted; 1506 formatted.reserve(format_string.length() + sub_length); 1507 1508 std::vector<ReplacementOffset> r_offsets; 1509 for (typename FormatStringType::const_iterator i = format_string.begin(); 1510 i != format_string.end(); ++i) { 1511 if ('$' == *i) { 1512 if (i + 1 != format_string.end()) { 1513 ++i; 1514 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 1515 if ('$' == *i) { 1516 formatted.push_back('$'); 1517 } else { 1518 uintptr_t index = *i - '1'; 1519 if (offsets) { 1520 ReplacementOffset r_offset(index, 1521 static_cast<int>(formatted.size())); 1522 r_offsets.insert(std::lower_bound(r_offsets.begin(), 1523 r_offsets.end(), r_offset, 1524 &CompareParameter), 1525 r_offset); 1526 } 1527 if (index < substitutions) 1528 formatted.append(subst.at(index)); 1529 } 1530 } 1531 } else { 1532 formatted.push_back(*i); 1533 } 1534 } 1535 if (offsets) { 1536 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 1537 i != r_offsets.end(); ++i) { 1538 offsets->push_back(i->offset); 1539 } 1540 } 1541 return formatted; 1542 } 1543 1544 string16 ReplaceStringPlaceholders(const string16& format_string, 1545 const std::vector<string16>& subst, 1546 std::vector<size_t>* offsets) { 1547 return DoReplaceStringPlaceholders(format_string, subst, offsets); 1548 } 1549 1550 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 1551 const std::vector<std::string>& subst, 1552 std::vector<size_t>* offsets) { 1553 return DoReplaceStringPlaceholders(format_string, subst, offsets); 1554 } 1555 1556 string16 ReplaceStringPlaceholders(const string16& format_string, 1557 const string16& a, 1558 size_t* offset) { 1559 std::vector<size_t> offsets; 1560 std::vector<string16> subst; 1561 subst.push_back(a); 1562 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 1563 1564 DCHECK(offsets.size() == 1); 1565 if (offset) { 1566 *offset = offsets[0]; 1567 } 1568 return result; 1569 } 1570 1571 template <class CHAR> 1572 static bool IsWildcard(CHAR character) { 1573 return character == '*' || character == '?'; 1574 } 1575 1576 // Move the strings pointers to the point where they start to differ. 1577 template <class CHAR> 1578 static void EatSameChars(const CHAR** pattern, const CHAR** string) { 1579 bool escaped = false; 1580 while (**pattern && **string) { 1581 if (!escaped && IsWildcard(**pattern)) { 1582 // We don't want to match wildcard here, except if it's escaped. 1583 return; 1584 } 1585 1586 // Check if the escapement char is found. If so, skip it and move to the 1587 // next character. 1588 if (!escaped && **pattern == L'\\') { 1589 escaped = true; 1590 (*pattern)++; 1591 continue; 1592 } 1593 1594 // Check if the chars match, if so, increment the ptrs. 1595 if (**pattern == **string) { 1596 (*pattern)++; 1597 (*string)++; 1598 } else { 1599 // Uh ho, it did not match, we are done. If the last char was an 1600 // escapement, that means that it was an error to advance the ptr here, 1601 // let's put it back where it was. This also mean that the MatchPattern 1602 // function will return false because if we can't match an escape char 1603 // here, then no one will. 1604 if (escaped) { 1605 (*pattern)--; 1606 } 1607 return; 1608 } 1609 1610 escaped = false; 1611 } 1612 } 1613 1614 template <class CHAR> 1615 static void EatWildcard(const CHAR** pattern) { 1616 while (**pattern) { 1617 if (!IsWildcard(**pattern)) 1618 return; 1619 (*pattern)++; 1620 } 1621 } 1622 1623 template <class CHAR> 1624 static bool MatchPatternT(const CHAR* eval, const CHAR* pattern, int depth) { 1625 const int kMaxDepth = 16; 1626 if (depth > kMaxDepth) 1627 return false; 1628 1629 // Eat all the matching chars. 1630 EatSameChars(&pattern, &eval); 1631 1632 // If the string is empty, then the pattern must be empty too, or contains 1633 // only wildcards. 1634 if (*eval == 0) { 1635 EatWildcard(&pattern); 1636 if (*pattern) 1637 return false; 1638 return true; 1639 } 1640 1641 // Pattern is empty but not string, this is not a match. 1642 if (*pattern == 0) 1643 return false; 1644 1645 // If this is a question mark, then we need to compare the rest with 1646 // the current string or the string with one character eaten. 1647 if (pattern[0] == '?') { 1648 if (MatchPatternT(eval, pattern + 1, depth + 1) || 1649 MatchPatternT(eval + 1, pattern + 1, depth + 1)) 1650 return true; 1651 } 1652 1653 // This is a *, try to match all the possible substrings with the remainder 1654 // of the pattern. 1655 if (pattern[0] == '*') { 1656 while (*eval) { 1657 if (MatchPatternT(eval, pattern + 1, depth + 1)) 1658 return true; 1659 eval++; 1660 } 1661 1662 // We reached the end of the string, let see if the pattern contains only 1663 // wildcards. 1664 if (*eval == 0) { 1665 EatWildcard(&pattern); 1666 if (*pattern) 1667 return false; 1668 return true; 1669 } 1670 } 1671 1672 return false; 1673 } 1674 1675 bool MatchPatternWide(const std::wstring& eval, const std::wstring& pattern) { 1676 return MatchPatternT(eval.c_str(), pattern.c_str(), 0); 1677 } 1678 1679 bool MatchPatternASCII(const std::string& eval, const std::string& pattern) { 1680 DCHECK(IsStringASCII(eval) && IsStringASCII(pattern)); 1681 return MatchPatternT(eval.c_str(), pattern.c_str(), 0); 1682 } 1683 1684 bool StringToInt(const std::string& input, int* output) { 1685 return StringToNumber<StringToIntTraits>(input, output); 1686 } 1687 1688 bool StringToInt(const string16& input, int* output) { 1689 return StringToNumber<String16ToIntTraits>(input, output); 1690 } 1691 1692 bool StringToInt64(const std::string& input, int64* output) { 1693 return StringToNumber<StringToInt64Traits>(input, output); 1694 } 1695 1696 bool StringToInt64(const string16& input, int64* output) { 1697 return StringToNumber<String16ToInt64Traits>(input, output); 1698 } 1699 1700 bool HexStringToInt(const std::string& input, int* output) { 1701 return StringToNumber<HexStringToIntTraits>(input, output); 1702 } 1703 1704 bool HexStringToInt(const string16& input, int* output) { 1705 return StringToNumber<HexString16ToIntTraits>(input, output); 1706 } 1707 1708 namespace { 1709 1710 template<class CHAR> 1711 bool HexDigitToIntT(const CHAR digit, uint8* val) { 1712 if (digit >= '0' && digit <= '9') 1713 *val = digit - '0'; 1714 else if (digit >= 'a' && digit <= 'f') 1715 *val = 10 + digit - 'a'; 1716 else if (digit >= 'A' && digit <= 'F') 1717 *val = 10 + digit - 'A'; 1718 else 1719 return false; 1720 return true; 1721 } 1722 1723 template<typename STR> 1724 bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) { 1725 DCHECK(output->size() == 0); 1726 size_t count = input.size(); 1727 if (count == 0 || (count % 2) != 0) 1728 return false; 1729 for (uintptr_t i = 0; i < count / 2; ++i) { 1730 uint8 msb = 0; // most significant 4 bits 1731 uint8 lsb = 0; // least significant 4 bits 1732 if (!HexDigitToIntT(input[i * 2], &msb) || 1733 !HexDigitToIntT(input[i * 2 + 1], &lsb)) 1734 return false; 1735 output->push_back((msb << 4) | lsb); 1736 } 1737 return true; 1738 } 1739 1740 } // namespace 1741 1742 bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) { 1743 return HexStringToBytesT(input, output); 1744 } 1745 1746 bool HexStringToBytes(const string16& input, std::vector<uint8>* output) { 1747 return HexStringToBytesT(input, output); 1748 } 1749 1750 int StringToInt(const std::string& value) { 1751 int result; 1752 StringToInt(value, &result); 1753 return result; 1754 } 1755 1756 int StringToInt(const string16& value) { 1757 int result; 1758 StringToInt(value, &result); 1759 return result; 1760 } 1761 1762 int64 StringToInt64(const std::string& value) { 1763 int64 result; 1764 StringToInt64(value, &result); 1765 return result; 1766 } 1767 1768 int64 StringToInt64(const string16& value) { 1769 int64 result; 1770 StringToInt64(value, &result); 1771 return result; 1772 } 1773 1774 int HexStringToInt(const std::string& value) { 1775 int result; 1776 HexStringToInt(value, &result); 1777 return result; 1778 } 1779 1780 int HexStringToInt(const string16& value) { 1781 int result; 1782 HexStringToInt(value, &result); 1783 return result; 1784 } 1785 1786 bool StringToDouble(const std::string& input, double* output) { 1787 return StringToNumber<StringToDoubleTraits>(input, output); 1788 } 1789 1790 bool StringToDouble(const string16& input, double* output) { 1791 return StringToNumber<String16ToDoubleTraits>(input, output); 1792 } 1793 1794 double StringToDouble(const std::string& value) { 1795 double result; 1796 StringToDouble(value, &result); 1797 return result; 1798 } 1799 1800 double StringToDouble(const string16& value) { 1801 double result; 1802 StringToDouble(value, &result); 1803 return result; 1804 } 1805 1806 // The following code is compatible with the OpenBSD lcpy interface. See: 1807 // http://www.gratisoft.us/todd/papers/strlcpy.html 1808 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 1809 1810 namespace { 1811 1812 template <typename CHAR> 1813 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 1814 for (size_t i = 0; i < dst_size; ++i) { 1815 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 1816 return i; 1817 } 1818 1819 // We were left off at dst_size. We over copied 1 byte. Null terminate. 1820 if (dst_size != 0) 1821 dst[dst_size - 1] = 0; 1822 1823 // Count the rest of the |src|, and return it's length in characters. 1824 while (src[dst_size]) ++dst_size; 1825 return dst_size; 1826 } 1827 1828 } // namespace 1829 1830 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 1831 return lcpyT<char>(dst, src, dst_size); 1832 } 1833 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 1834 return lcpyT<wchar_t>(dst, src, dst_size); 1835 } 1836 1837 bool ElideString(const std::wstring& input, int max_len, std::wstring* output) { 1838 DCHECK(max_len >= 0); 1839 if (static_cast<int>(input.length()) <= max_len) { 1840 output->assign(input); 1841 return false; 1842 } 1843 1844 switch (max_len) { 1845 case 0: 1846 output->clear(); 1847 break; 1848 case 1: 1849 output->assign(input.substr(0, 1)); 1850 break; 1851 case 2: 1852 output->assign(input.substr(0, 2)); 1853 break; 1854 case 3: 1855 output->assign(input.substr(0, 1) + L"." + 1856 input.substr(input.length() - 1)); 1857 break; 1858 case 4: 1859 output->assign(input.substr(0, 1) + L".." + 1860 input.substr(input.length() - 1)); 1861 break; 1862 default: { 1863 int rstr_len = (max_len - 3) / 2; 1864 int lstr_len = rstr_len + ((max_len - 3) % 2); 1865 output->assign(input.substr(0, lstr_len) + L"..." + 1866 input.substr(input.length() - rstr_len)); 1867 break; 1868 } 1869 } 1870 1871 return true; 1872 } 1873 1874 std::string HexEncode(const void* bytes, size_t size) { 1875 static const char kHexChars[] = "0123456789ABCDEF"; 1876 1877 // Each input byte creates two output hex characters. 1878 std::string ret(size * 2, '\0'); 1879 1880 for (size_t i = 0; i < size; ++i) { 1881 char b = reinterpret_cast<const char*>(bytes)[i]; 1882 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; 1883 ret[(i * 2) + 1] = kHexChars[b & 0xf]; 1884 } 1885 return ret; 1886 } 1887