1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <math.h> 6 #include <stdarg.h> 7 8 #include <limits> 9 #include <sstream> 10 11 #include "base/basictypes.h" 12 #include "base/string_util.h" 13 #include "testing/gtest/include/gtest/gtest.h" 14 15 namespace base { 16 17 namespace { 18 19 // Given a null-terminated string of wchar_t with each wchar_t representing 20 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. 21 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF) 22 // should be represented as a surrogate pair (two UTF-16 units) 23 // *even* where wchar_t is 32-bit (Linux and Mac). 24 // 25 // This is to help write tests for functions with string16 params until 26 // the C++ 0x UTF-16 literal is well-supported by compilers. 27 string16 BuildString16(const wchar_t* s) { 28 #if defined(WCHAR_T_IS_UTF16) 29 return string16(s); 30 #elif defined(WCHAR_T_IS_UTF32) 31 string16 u16; 32 while (*s != 0) { 33 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu); 34 u16.push_back(*s++); 35 } 36 return u16; 37 #endif 38 } 39 40 } // namespace 41 42 static const struct trim_case { 43 const wchar_t* input; 44 const TrimPositions positions; 45 const wchar_t* output; 46 const TrimPositions return_value; 47 } trim_cases[] = { 48 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, 49 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, 50 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, 51 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, 52 {L"", TRIM_ALL, L"", TRIM_NONE}, 53 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, 54 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, 55 {L" ", TRIM_ALL, L"", TRIM_ALL}, 56 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, 57 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, 58 }; 59 60 static const struct trim_case_ascii { 61 const char* input; 62 const TrimPositions positions; 63 const char* output; 64 const TrimPositions return_value; 65 } trim_cases_ascii[] = { 66 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, 67 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, 68 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, 69 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, 70 {"", TRIM_ALL, "", TRIM_NONE}, 71 {" ", TRIM_LEADING, "", TRIM_LEADING}, 72 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, 73 {" ", TRIM_ALL, "", TRIM_ALL}, 74 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, 75 }; 76 77 TEST(StringUtilTest, TrimWhitespace) { 78 std::wstring output; // Allow contents to carry over to next testcase 79 for (size_t i = 0; i < arraysize(trim_cases); ++i) { 80 const trim_case& value = trim_cases[i]; 81 EXPECT_EQ(value.return_value, 82 TrimWhitespace(value.input, value.positions, &output)); 83 EXPECT_EQ(value.output, output); 84 } 85 86 // Test that TrimWhitespace() can take the same string for input and output 87 output = L" This is a test \r\n"; 88 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 89 EXPECT_EQ(L"This is a test", output); 90 91 // Once more, but with a string of whitespace 92 output = L" \r\n"; 93 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 94 EXPECT_EQ(L"", output); 95 96 std::string output_ascii; 97 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { 98 const trim_case_ascii& value = trim_cases_ascii[i]; 99 EXPECT_EQ(value.return_value, 100 TrimWhitespace(value.input, value.positions, &output_ascii)); 101 EXPECT_EQ(value.output, output_ascii); 102 } 103 } 104 105 static const struct collapse_case { 106 const wchar_t* input; 107 const bool trim; 108 const wchar_t* output; 109 } collapse_cases[] = { 110 {L" Google Video ", false, L"Google Video"}, 111 {L"Google Video", false, L"Google Video"}, 112 {L"", false, L""}, 113 {L" ", false, L""}, 114 {L"\t\rTest String\n", false, L"Test String"}, 115 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, 116 {L" Test \n \t String ", false, L"Test String"}, 117 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, 118 {L" Test String", false, L"Test String"}, 119 {L"Test String ", false, L"Test String"}, 120 {L"Test String", false, L"Test String"}, 121 {L"", true, L""}, 122 {L"\n", true, L""}, 123 {L" \r ", true, L""}, 124 {L"\nFoo", true, L"Foo"}, 125 {L"\r Foo ", true, L"Foo"}, 126 {L" Foo bar ", true, L"Foo bar"}, 127 {L" \tFoo bar \n", true, L"Foo bar"}, 128 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, 129 }; 130 131 TEST(StringUtilTest, CollapseWhitespace) { 132 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { 133 const collapse_case& value = collapse_cases[i]; 134 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); 135 } 136 } 137 138 static const struct collapse_case_ascii { 139 const char* input; 140 const bool trim; 141 const char* output; 142 } collapse_cases_ascii[] = { 143 {" Google Video ", false, "Google Video"}, 144 {"Google Video", false, "Google Video"}, 145 {"", false, ""}, 146 {" ", false, ""}, 147 {"\t\rTest String\n", false, "Test String"}, 148 {" Test \n \t String ", false, "Test String"}, 149 {" Test String", false, "Test String"}, 150 {"Test String ", false, "Test String"}, 151 {"Test String", false, "Test String"}, 152 {"", true, ""}, 153 {"\n", true, ""}, 154 {" \r ", true, ""}, 155 {"\nFoo", true, "Foo"}, 156 {"\r Foo ", true, "Foo"}, 157 {" Foo bar ", true, "Foo bar"}, 158 {" \tFoo bar \n", true, "Foo bar"}, 159 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, 160 }; 161 162 TEST(StringUtilTest, CollapseWhitespaceASCII) { 163 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { 164 const collapse_case_ascii& value = collapse_cases_ascii[i]; 165 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); 166 } 167 } 168 169 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) { 170 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("")); 171 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" ")); 172 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t")); 173 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n ")); 174 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a")); 175 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n ")); 176 } 177 178 TEST(StringUtilTest, ContainsOnlyWhitespace) { 179 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(""))); 180 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" "))); 181 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t"))); 182 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n "))); 183 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a"))); 184 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n "))); 185 } 186 187 TEST(StringUtilTest, IsStringUTF8) { 188 EXPECT_TRUE(IsStringUTF8("abc")); 189 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); 190 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); 191 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); 192 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); 193 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM 194 195 // surrogate code points 196 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); 197 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); 198 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); 199 200 // overlong sequences 201 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 202 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" 203 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 204 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 205 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff 206 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D 207 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 208 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 209 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) 210 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F 211 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 212 213 // Beyond U+10FFFF (the upper limit of Unicode codespace) 214 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 215 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes 216 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes 217 218 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 219 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); 220 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); 221 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); 222 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); 223 224 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 225 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) 226 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE 227 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF 228 229 // This should also be false, but currently we pass them through. 230 // Disable them for now. 231 #if 0 232 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 233 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF 234 #endif 235 236 // Strings in legacy encodings. We can certainly make up strings 237 // in a legacy encoding that are valid in UTF-8, but in real data, 238 // most of them are invalid as UTF-8. 239 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 240 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR 241 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 242 // "abc" with U+201[CD] in windows-125[0-8] 243 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); 244 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 245 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); 246 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 247 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); 248 } 249 250 TEST(StringUtilTest, ConvertASCII) { 251 static const char* char_cases[] = { 252 "Google Video", 253 "Hello, world\n", 254 "0123ABCDwxyz \a\b\t\r\n!+,.~" 255 }; 256 257 static const wchar_t* const wchar_cases[] = { 258 L"Google Video", 259 L"Hello, world\n", 260 L"0123ABCDwxyz \a\b\t\r\n!+,.~" 261 }; 262 263 for (size_t i = 0; i < arraysize(char_cases); ++i) { 264 EXPECT_TRUE(IsStringASCII(char_cases[i])); 265 std::wstring wide = ASCIIToWide(char_cases[i]); 266 EXPECT_EQ(wchar_cases[i], wide); 267 268 EXPECT_TRUE(IsStringASCII(wchar_cases[i])); 269 std::string ascii = WideToASCII(wchar_cases[i]); 270 EXPECT_EQ(char_cases[i], ascii); 271 } 272 273 EXPECT_FALSE(IsStringASCII("Google \x80Video")); 274 EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); 275 276 // Convert empty strings. 277 std::wstring wempty; 278 std::string empty; 279 EXPECT_EQ(empty, WideToASCII(wempty)); 280 EXPECT_EQ(wempty, ASCIIToWide(empty)); 281 282 // Convert strings with an embedded NUL character. 283 const char chars_with_nul[] = "test\0string"; 284 const int length_with_nul = arraysize(chars_with_nul) - 1; 285 std::string string_with_nul(chars_with_nul, length_with_nul); 286 std::wstring wide_with_nul = ASCIIToWide(string_with_nul); 287 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), 288 wide_with_nul.length()); 289 std::string narrow_with_nul = WideToASCII(wide_with_nul); 290 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), 291 narrow_with_nul.length()); 292 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); 293 } 294 295 TEST(StringUtilTest, ToUpperASCII) { 296 EXPECT_EQ('C', ToUpperASCII('C')); 297 EXPECT_EQ('C', ToUpperASCII('c')); 298 EXPECT_EQ('2', ToUpperASCII('2')); 299 300 EXPECT_EQ(L'C', ToUpperASCII(L'C')); 301 EXPECT_EQ(L'C', ToUpperASCII(L'c')); 302 EXPECT_EQ(L'2', ToUpperASCII(L'2')); 303 304 std::string in_place_a("Cc2"); 305 StringToUpperASCII(&in_place_a); 306 EXPECT_EQ("CC2", in_place_a); 307 308 std::wstring in_place_w(L"Cc2"); 309 StringToUpperASCII(&in_place_w); 310 EXPECT_EQ(L"CC2", in_place_w); 311 312 std::string original_a("Cc2"); 313 std::string upper_a = StringToUpperASCII(original_a); 314 EXPECT_EQ("CC2", upper_a); 315 316 std::wstring original_w(L"Cc2"); 317 std::wstring upper_w = StringToUpperASCII(original_w); 318 EXPECT_EQ(L"CC2", upper_w); 319 } 320 321 static const struct { 322 const wchar_t* src_w; 323 const char* src_a; 324 const char* dst; 325 } lowercase_cases[] = { 326 {L"FoO", "FoO", "foo"}, 327 {L"foo", "foo", "foo"}, 328 {L"FOO", "FOO", "foo"}, 329 }; 330 331 TEST(StringUtilTest, LowerCaseEqualsASCII) { 332 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { 333 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, 334 lowercase_cases[i].dst)); 335 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, 336 lowercase_cases[i].dst)); 337 } 338 } 339 340 TEST(StringUtilTest, GetByteDisplayUnits) { 341 static const struct { 342 int64 bytes; 343 DataUnits expected; 344 } cases[] = { 345 {0, DATA_UNITS_BYTE}, 346 {512, DATA_UNITS_BYTE}, 347 {10*1024, DATA_UNITS_KIBIBYTE}, 348 {10*1024*1024, DATA_UNITS_MEBIBYTE}, 349 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE}, 350 {~(1LL<<63), DATA_UNITS_GIBIBYTE}, 351 #ifdef NDEBUG 352 {-1, DATA_UNITS_BYTE}, 353 #endif 354 }; 355 356 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 357 EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes)); 358 } 359 360 TEST(StringUtilTest, FormatBytes) { 361 static const struct { 362 int64 bytes; 363 DataUnits units; 364 const wchar_t* expected; 365 const wchar_t* expected_with_units; 366 } cases[] = { 367 {0, DATA_UNITS_BYTE, L"0", L"0 B"}, 368 {512, DATA_UNITS_BYTE, L"512", L"512 B"}, 369 {512, DATA_UNITS_KIBIBYTE, L"0.5", L"0.5 kB"}, 370 {1024*1024, DATA_UNITS_KIBIBYTE, L"1024", L"1024 kB"}, 371 {1024*1024, DATA_UNITS_MEBIBYTE, L"1", L"1 MB"}, 372 {1024*1024*1024, DATA_UNITS_GIBIBYTE, L"1", L"1 GB"}, 373 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10", L"10 GB"}, 374 {~(1LL<<63), DATA_UNITS_GIBIBYTE, L"8589934592", L"8589934592 GB"}, 375 // Make sure the first digit of the fractional part works. 376 {1024*1024 + 103, DATA_UNITS_KIBIBYTE, L"1024.1", L"1024.1 kB"}, 377 {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, L"1.2", L"1.2 MB"}, 378 {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE, 379 L"1.9", L"1.9 GB"}, 380 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10", L"10 GB"}, 381 #ifdef NDEBUG 382 {-1, DATA_UNITS_BYTE, L"", L""}, 383 #endif 384 }; 385 386 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 387 EXPECT_EQ(cases[i].expected, 388 FormatBytes(cases[i].bytes, cases[i].units, false)); 389 EXPECT_EQ(cases[i].expected_with_units, 390 FormatBytes(cases[i].bytes, cases[i].units, true)); 391 } 392 } 393 394 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { 395 static const struct { 396 const char* str; 397 string16::size_type start_offset; 398 const char* find_this; 399 const char* replace_with; 400 const char* expected; 401 } cases[] = { 402 {"aaa", 0, "a", "b", "bbb"}, 403 {"abb", 0, "ab", "a", "ab"}, 404 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, 405 {"Not found", 0, "x", "0", "Not found"}, 406 {"Not found again", 5, "x", "0", "Not found again"}, 407 {" Making it much longer ", 0, " ", "Four score and seven years ago", 408 "Four score and seven years agoMakingFour score and seven years agoit" 409 "Four score and seven years agomuchFour score and seven years agolonger" 410 "Four score and seven years ago"}, 411 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 412 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, 413 {"abababab", 2, "ab", "c", "abccc"}, 414 }; 415 416 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 417 string16 str = ASCIIToUTF16(cases[i].str); 418 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, 419 ASCIIToUTF16(cases[i].find_this), 420 ASCIIToUTF16(cases[i].replace_with)); 421 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 422 } 423 } 424 425 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { 426 static const struct { 427 const char* str; 428 string16::size_type start_offset; 429 const char* find_this; 430 const char* replace_with; 431 const char* expected; 432 } cases[] = { 433 {"aaa", 0, "a", "b", "baa"}, 434 {"abb", 0, "ab", "a", "ab"}, 435 {"Removing some substrings inging", 0, "ing", "", 436 "Remov some substrings inging"}, 437 {"Not found", 0, "x", "0", "Not found"}, 438 {"Not found again", 5, "x", "0", "Not found again"}, 439 {" Making it much longer ", 0, " ", "Four score and seven years ago", 440 "Four score and seven years agoMaking it much longer "}, 441 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 442 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, 443 {"abababab", 2, "ab", "c", "abcabab"}, 444 }; 445 446 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 447 string16 str = ASCIIToUTF16(cases[i].str); 448 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, 449 ASCIIToUTF16(cases[i].find_this), 450 ASCIIToUTF16(cases[i].replace_with)); 451 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 452 } 453 } 454 455 namespace { 456 457 template <typename INT> 458 struct IntToStringTest { 459 INT num; 460 const char* sexpected; 461 const char* uexpected; 462 }; 463 464 } 465 466 TEST(StringUtilTest, IntToString) { 467 static const IntToStringTest<int> int_tests[] = { 468 { 0, "0", "0" }, 469 { -1, "-1", "4294967295" }, 470 { std::numeric_limits<int>::max(), "2147483647", "2147483647" }, 471 { std::numeric_limits<int>::min(), "-2147483648", "2147483648" }, 472 }; 473 static const IntToStringTest<int64> int64_tests[] = { 474 { 0, "0", "0" }, 475 { -1, "-1", "18446744073709551615" }, 476 { std::numeric_limits<int64>::max(), 477 "9223372036854775807", 478 "9223372036854775807", }, 479 { std::numeric_limits<int64>::min(), 480 "-9223372036854775808", 481 "9223372036854775808" }, 482 }; 483 484 for (size_t i = 0; i < arraysize(int_tests); ++i) { 485 const IntToStringTest<int>* test = &int_tests[i]; 486 EXPECT_EQ(IntToString(test->num), test->sexpected); 487 EXPECT_EQ(IntToWString(test->num), UTF8ToWide(test->sexpected)); 488 EXPECT_EQ(UintToString(test->num), test->uexpected); 489 EXPECT_EQ(UintToWString(test->num), UTF8ToWide(test->uexpected)); 490 } 491 for (size_t i = 0; i < arraysize(int64_tests); ++i) { 492 const IntToStringTest<int64>* test = &int64_tests[i]; 493 EXPECT_EQ(Int64ToString(test->num), test->sexpected); 494 EXPECT_EQ(Int64ToWString(test->num), UTF8ToWide(test->sexpected)); 495 EXPECT_EQ(Uint64ToString(test->num), test->uexpected); 496 EXPECT_EQ(Uint64ToWString(test->num), UTF8ToWide(test->uexpected)); 497 } 498 } 499 500 TEST(StringUtilTest, Uint64ToString) { 501 static const struct { 502 uint64 input; 503 std::string output; 504 } cases[] = { 505 {0, "0"}, 506 {42, "42"}, 507 {INT_MAX, "2147483647"}, 508 {kuint64max, "18446744073709551615"}, 509 }; 510 511 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 512 EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input)); 513 } 514 515 TEST(StringUtilTest, StringToInt) { 516 static const struct { 517 std::string input; 518 int output; 519 bool success; 520 } cases[] = { 521 {"0", 0, true}, 522 {"42", 42, true}, 523 {"-2147483648", INT_MIN, true}, 524 {"2147483647", INT_MAX, true}, 525 {"", 0, false}, 526 {" 42", 42, false}, 527 {"42 ", 42, false}, 528 {"\t\n\v\f\r 42", 42, false}, 529 {"blah42", 0, false}, 530 {"42blah", 42, false}, 531 {"blah42blah", 0, false}, 532 {"-273.15", -273, false}, 533 {"+98.6", 98, false}, 534 {"--123", 0, false}, 535 {"++123", 0, false}, 536 {"-+123", 0, false}, 537 {"+-123", 0, false}, 538 {"-", 0, false}, 539 {"-2147483649", INT_MIN, false}, 540 {"-99999999999", INT_MIN, false}, 541 {"2147483648", INT_MAX, false}, 542 {"99999999999", INT_MAX, false}, 543 }; 544 545 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 546 EXPECT_EQ(cases[i].output, StringToInt(cases[i].input)); 547 int output; 548 EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output)); 549 EXPECT_EQ(cases[i].output, output); 550 551 std::wstring wide_input = ASCIIToWide(cases[i].input); 552 EXPECT_EQ(cases[i].output, StringToInt(WideToUTF16Hack(wide_input))); 553 EXPECT_EQ(cases[i].success, StringToInt(WideToUTF16Hack(wide_input), 554 &output)); 555 EXPECT_EQ(cases[i].output, output); 556 } 557 558 // One additional test to verify that conversion of numbers in strings with 559 // embedded NUL characters. The NUL and extra data after it should be 560 // interpreted as junk after the number. 561 const char input[] = "6\06"; 562 std::string input_string(input, arraysize(input) - 1); 563 int output; 564 EXPECT_FALSE(StringToInt(input_string, &output)); 565 EXPECT_EQ(6, output); 566 567 std::wstring wide_input = ASCIIToWide(input_string); 568 EXPECT_FALSE(StringToInt(WideToUTF16Hack(wide_input), &output)); 569 EXPECT_EQ(6, output); 570 } 571 572 TEST(StringUtilTest, StringToInt64) { 573 static const struct { 574 std::string input; 575 int64 output; 576 bool success; 577 } cases[] = { 578 {"0", 0, true}, 579 {"42", 42, true}, 580 {"-2147483648", INT_MIN, true}, 581 {"2147483647", INT_MAX, true}, 582 {"-2147483649", GG_INT64_C(-2147483649), true}, 583 {"-99999999999", GG_INT64_C(-99999999999), true}, 584 {"2147483648", GG_INT64_C(2147483648), true}, 585 {"99999999999", GG_INT64_C(99999999999), true}, 586 {"9223372036854775807", kint64max, true}, 587 {"-9223372036854775808", kint64min, true}, 588 {"09", 9, true}, 589 {"-09", -9, true}, 590 {"", 0, false}, 591 {" 42", 42, false}, 592 {"42 ", 42, false}, 593 {"\t\n\v\f\r 42", 42, false}, 594 {"blah42", 0, false}, 595 {"42blah", 42, false}, 596 {"blah42blah", 0, false}, 597 {"-273.15", -273, false}, 598 {"+98.6", 98, false}, 599 {"--123", 0, false}, 600 {"++123", 0, false}, 601 {"-+123", 0, false}, 602 {"+-123", 0, false}, 603 {"-", 0, false}, 604 {"-9223372036854775809", kint64min, false}, 605 {"-99999999999999999999", kint64min, false}, 606 {"9223372036854775808", kint64max, false}, 607 {"99999999999999999999", kint64max, false}, 608 }; 609 610 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 611 EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input)); 612 int64 output; 613 EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output)); 614 EXPECT_EQ(cases[i].output, output); 615 616 std::wstring wide_input = ASCIIToWide(cases[i].input); 617 EXPECT_EQ(cases[i].output, StringToInt64(WideToUTF16Hack(wide_input))); 618 EXPECT_EQ(cases[i].success, StringToInt64(WideToUTF16Hack(wide_input), 619 &output)); 620 EXPECT_EQ(cases[i].output, output); 621 } 622 623 // One additional test to verify that conversion of numbers in strings with 624 // embedded NUL characters. The NUL and extra data after it should be 625 // interpreted as junk after the number. 626 const char input[] = "6\06"; 627 std::string input_string(input, arraysize(input) - 1); 628 int64 output; 629 EXPECT_FALSE(StringToInt64(input_string, &output)); 630 EXPECT_EQ(6, output); 631 632 std::wstring wide_input = ASCIIToWide(input_string); 633 EXPECT_FALSE(StringToInt64(WideToUTF16Hack(wide_input), &output)); 634 EXPECT_EQ(6, output); 635 } 636 637 TEST(StringUtilTest, HexStringToInt) { 638 static const struct { 639 std::string input; 640 int output; 641 bool success; 642 } cases[] = { 643 {"0", 0, true}, 644 {"42", 66, true}, 645 {"-42", -66, true}, 646 {"+42", 66, true}, 647 {"7fffffff", INT_MAX, true}, 648 {"80000000", INT_MIN, true}, 649 {"ffffffff", -1, true}, 650 {"DeadBeef", 0xdeadbeef, true}, 651 {"0x42", 66, true}, 652 {"-0x42", -66, true}, 653 {"+0x42", 66, true}, 654 {"0x7fffffff", INT_MAX, true}, 655 {"0x80000000", INT_MIN, true}, 656 {"0xffffffff", -1, true}, 657 {"0XDeadBeef", 0xdeadbeef, true}, 658 {"0x0f", 15, true}, 659 {"0f", 15, true}, 660 {" 45", 0x45, false}, 661 {"\t\n\v\f\r 0x45", 0x45, false}, 662 {" 45", 0x45, false}, 663 {"45 ", 0x45, false}, 664 {"efgh", 0xef, false}, 665 {"0xefgh", 0xef, false}, 666 {"hgfe", 0, false}, 667 {"100000000", -1, false}, // don't care about |output|, just |success| 668 {"-", 0, false}, 669 {"", 0, false}, 670 }; 671 672 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 673 EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input)); 674 int output; 675 EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output)); 676 EXPECT_EQ(cases[i].output, output); 677 678 std::wstring wide_input = ASCIIToWide(cases[i].input); 679 EXPECT_EQ(cases[i].output, HexStringToInt(WideToUTF16Hack(wide_input))); 680 EXPECT_EQ(cases[i].success, HexStringToInt(WideToUTF16Hack(wide_input), 681 &output)); 682 EXPECT_EQ(cases[i].output, output); 683 } 684 // One additional test to verify that conversion of numbers in strings with 685 // embedded NUL characters. The NUL and extra data after it should be 686 // interpreted as junk after the number. 687 const char input[] = "0xc0ffee\09"; 688 std::string input_string(input, arraysize(input) - 1); 689 int output; 690 EXPECT_FALSE(HexStringToInt(input_string, &output)); 691 EXPECT_EQ(0xc0ffee, output); 692 693 std::wstring wide_input = ASCIIToWide(input_string); 694 EXPECT_FALSE(HexStringToInt(WideToUTF16Hack(wide_input), &output)); 695 EXPECT_EQ(0xc0ffee, output); 696 } 697 698 TEST(StringUtilTest, HexStringToBytes) { 699 static const struct { 700 const std::string input; 701 const char* output; 702 size_t output_len; 703 bool success; 704 } cases[] = { 705 {"0", "", 0, false}, // odd number of characters fails 706 {"00", "\0", 1, true}, 707 {"42", "\x42", 1, true}, 708 {"-42", "", 0, false}, // any non-hex value fails 709 {"+42", "", 0, false}, 710 {"7fffffff", "\x7f\xff\xff\xff", 4, true}, 711 {"80000000", "\x80\0\0\0", 4, true}, 712 {"deadbeef", "\xde\xad\xbe\xef", 4, true}, 713 {"DeadBeef", "\xde\xad\xbe\xef", 4, true}, 714 {"0x42", "", 0, false}, // leading 0x fails (x is not hex) 715 {"0f", "\xf", 1, true}, 716 {"45 ", "\x45", 1, false}, 717 {"efgh", "\xef", 1, false}, 718 {"", "", 0, false}, 719 {"0123456789ABCDEF", "\x01\x23\x45\x67\x89\xAB\xCD\xEF", 8, true}, 720 {"0123456789ABCDEF012345", 721 "\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45", 11, true}, 722 }; 723 724 725 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 726 std::vector<uint8> output; 727 std::vector<uint8> compare; 728 EXPECT_EQ(cases[i].success, HexStringToBytes(cases[i].input, &output)) << 729 i << ": " << cases[i].input; 730 for (size_t j = 0; j < cases[i].output_len; ++j) 731 compare.push_back(static_cast<uint8>(cases[i].output[j])); 732 ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input; 733 EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) << 734 i << ": " << cases[i].input; 735 736 output.clear(); 737 compare.clear(); 738 739 std::wstring wide_input = ASCIIToWide(cases[i].input); 740 EXPECT_EQ(cases[i].success, 741 HexStringToBytes(WideToUTF16Hack(wide_input), &output)) << 742 i << ": " << cases[i].input; 743 for (size_t j = 0; j < cases[i].output_len; ++j) 744 compare.push_back(static_cast<uint8>(cases[i].output[j])); 745 ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input; 746 EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) << 747 i << ": " << cases[i].input; 748 } 749 } 750 751 TEST(StringUtilTest, StringToDouble) { 752 static const struct { 753 std::string input; 754 double output; 755 bool success; 756 } cases[] = { 757 {"0", 0.0, true}, 758 {"42", 42.0, true}, 759 {"-42", -42.0, true}, 760 {"123.45", 123.45, true}, 761 {"-123.45", -123.45, true}, 762 {"+123.45", 123.45, true}, 763 {"2.99792458e8", 299792458.0, true}, 764 {"149597870.691E+3", 149597870691.0, true}, 765 {"6.", 6.0, true}, 766 {"9e99999999999999999999", HUGE_VAL, false}, 767 {"-9e99999999999999999999", -HUGE_VAL, false}, 768 {"1e-2", 0.01, true}, 769 {" 1e-2", 0.01, false}, 770 {"1e-2 ", 0.01, false}, 771 {"-1E-7", -0.0000001, true}, 772 {"01e02", 100, true}, 773 {"2.3e15", 2.3e15, true}, 774 {"\t\n\v\f\r -123.45e2", -12345.0, false}, 775 {"+123 e4", 123.0, false}, 776 {"123e ", 123.0, false}, 777 {"123e", 123.0, false}, 778 {" 2.99", 2.99, false}, 779 {"1e3.4", 1000.0, false}, 780 {"nothing", 0.0, false}, 781 {"-", 0.0, false}, 782 {"+", 0.0, false}, 783 {"", 0.0, false}, 784 }; 785 786 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 787 EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(cases[i].input)); 788 double output; 789 EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output)); 790 EXPECT_DOUBLE_EQ(cases[i].output, output); 791 792 std::wstring wide_input = ASCIIToWide(cases[i].input); 793 EXPECT_DOUBLE_EQ(cases[i].output, 794 StringToDouble(WideToUTF16Hack(wide_input))); 795 EXPECT_EQ(cases[i].success, StringToDouble(WideToUTF16Hack(wide_input), 796 &output)); 797 EXPECT_DOUBLE_EQ(cases[i].output, output); 798 } 799 800 // One additional test to verify that conversion of numbers in strings with 801 // embedded NUL characters. The NUL and extra data after it should be 802 // interpreted as junk after the number. 803 const char input[] = "3.14\0159"; 804 std::string input_string(input, arraysize(input) - 1); 805 double output; 806 EXPECT_FALSE(StringToDouble(input_string, &output)); 807 EXPECT_DOUBLE_EQ(3.14, output); 808 809 std::wstring wide_input = ASCIIToWide(input_string); 810 EXPECT_FALSE(StringToDouble(WideToUTF16Hack(wide_input), &output)); 811 EXPECT_DOUBLE_EQ(3.14, output); 812 } 813 814 // This checks where we can use the assignment operator for a va_list. We need 815 // a way to do this since Visual C doesn't support va_copy, but assignment on 816 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this 817 // capability. 818 static void VariableArgsFunc(const char* format, ...) { 819 va_list org; 820 va_start(org, format); 821 822 va_list dup; 823 GG_VA_COPY(dup, org); 824 int i1 = va_arg(org, int); 825 int j1 = va_arg(org, int); 826 char* s1 = va_arg(org, char*); 827 double d1 = va_arg(org, double); 828 va_end(org); 829 830 int i2 = va_arg(dup, int); 831 int j2 = va_arg(dup, int); 832 char* s2 = va_arg(dup, char*); 833 double d2 = va_arg(dup, double); 834 835 EXPECT_EQ(i1, i2); 836 EXPECT_EQ(j1, j2); 837 EXPECT_STREQ(s1, s2); 838 EXPECT_EQ(d1, d2); 839 840 va_end(dup); 841 } 842 843 TEST(StringUtilTest, VAList) { 844 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); 845 } 846 847 TEST(StringUtilTest, StringPrintfEmpty) { 848 EXPECT_EQ("", StringPrintf("%s", "")); 849 } 850 851 TEST(StringUtilTest, StringPrintfMisc) { 852 EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w')); 853 EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w')); 854 } 855 856 TEST(StringUtilTest, StringAppendfEmptyString) { 857 std::string value("Hello"); 858 StringAppendF(&value, "%s", ""); 859 EXPECT_EQ("Hello", value); 860 861 std::wstring valuew(L"Hello"); 862 StringAppendF(&valuew, L"%ls", L""); 863 EXPECT_EQ(L"Hello", valuew); 864 } 865 866 TEST(StringUtilTest, StringAppendfString) { 867 std::string value("Hello"); 868 StringAppendF(&value, " %s", "World"); 869 EXPECT_EQ("Hello World", value); 870 871 std::wstring valuew(L"Hello"); 872 StringAppendF(&valuew, L" %ls", L"World"); 873 EXPECT_EQ(L"Hello World", valuew); 874 } 875 876 TEST(StringUtilTest, StringAppendfInt) { 877 std::string value("Hello"); 878 StringAppendF(&value, " %d", 123); 879 EXPECT_EQ("Hello 123", value); 880 881 std::wstring valuew(L"Hello"); 882 StringAppendF(&valuew, L" %d", 123); 883 EXPECT_EQ(L"Hello 123", valuew); 884 } 885 886 // Make sure that lengths exactly around the initial buffer size are handled 887 // correctly. 888 TEST(StringUtilTest, StringPrintfBounds) { 889 const int kSrcLen = 1026; 890 char src[kSrcLen]; 891 for (size_t i = 0; i < arraysize(src); i++) 892 src[i] = 'A'; 893 894 wchar_t srcw[kSrcLen]; 895 for (size_t i = 0; i < arraysize(srcw); i++) 896 srcw[i] = 'A'; 897 898 for (int i = 1; i < 3; i++) { 899 src[kSrcLen - i] = 0; 900 std::string out; 901 SStringPrintf(&out, "%s", src); 902 EXPECT_STREQ(src, out.c_str()); 903 904 srcw[kSrcLen - i] = 0; 905 std::wstring outw; 906 SStringPrintf(&outw, L"%ls", srcw); 907 EXPECT_STREQ(srcw, outw.c_str()); 908 } 909 } 910 911 // Test very large sprintfs that will cause the buffer to grow. 912 TEST(StringUtilTest, Grow) { 913 char src[1026]; 914 for (size_t i = 0; i < arraysize(src); i++) 915 src[i] = 'A'; 916 src[1025] = 0; 917 918 const char* fmt = "%sB%sB%sB%sB%sB%sB%s"; 919 920 std::string out; 921 SStringPrintf(&out, fmt, src, src, src, src, src, src, src); 922 923 const int kRefSize = 320000; 924 char* ref = new char[kRefSize]; 925 #if defined(OS_WIN) 926 sprintf_s(ref, kRefSize, fmt, src, src, src, src, src, src, src); 927 #elif defined(OS_POSIX) 928 snprintf(ref, kRefSize, fmt, src, src, src, src, src, src, src); 929 #endif 930 931 EXPECT_STREQ(ref, out.c_str()); 932 delete[] ref; 933 } 934 935 // A helper for the StringAppendV test that follows. 936 // Just forwards its args to StringAppendV. 937 static void StringAppendVTestHelper(std::string* out, 938 const char* format, 939 ...) PRINTF_FORMAT(2, 3); 940 941 static void StringAppendVTestHelper(std::string* out, const char* format, ...) { 942 va_list ap; 943 va_start(ap, format); 944 StringAppendV(out, format, ap); 945 va_end(ap); 946 } 947 948 TEST(StringUtilTest, StringAppendV) { 949 std::string out; 950 StringAppendVTestHelper(&out, "%d foo %s", 1, "bar"); 951 EXPECT_EQ("1 foo bar", out); 952 } 953 954 // Test the boundary condition for the size of the string_util's 955 // internal buffer. 956 TEST(StringUtilTest, GrowBoundary) { 957 const int string_util_buf_len = 1024; 958 // Our buffer should be one larger than the size of StringAppendVT's stack 959 // buffer. 960 const int buf_len = string_util_buf_len + 1; 961 char src[buf_len + 1]; // Need extra one for NULL-terminator. 962 for (int i = 0; i < buf_len; ++i) 963 src[i] = 'a'; 964 src[buf_len] = 0; 965 966 std::string out; 967 SStringPrintf(&out, "%s", src); 968 969 EXPECT_STREQ(src, out.c_str()); 970 } 971 972 // TODO(evanm): what's the proper cross-platform test here? 973 #if defined(OS_WIN) 974 // sprintf in Visual Studio fails when given U+FFFF. This tests that the 975 // failure case is gracefuly handled. 976 TEST(StringUtilTest, Invalid) { 977 wchar_t invalid[2]; 978 invalid[0] = 0xffff; 979 invalid[1] = 0; 980 981 std::wstring out; 982 SStringPrintf(&out, L"%ls", invalid); 983 EXPECT_STREQ(L"", out.c_str()); 984 } 985 #endif 986 987 // Test for SplitString 988 TEST(StringUtilTest, SplitString) { 989 std::vector<std::wstring> r; 990 991 SplitString(L"a,b,c", L',', &r); 992 ASSERT_EQ(3U, r.size()); 993 EXPECT_EQ(r[0], L"a"); 994 EXPECT_EQ(r[1], L"b"); 995 EXPECT_EQ(r[2], L"c"); 996 r.clear(); 997 998 SplitString(L"a, b, c", L',', &r); 999 ASSERT_EQ(3U, r.size()); 1000 EXPECT_EQ(r[0], L"a"); 1001 EXPECT_EQ(r[1], L"b"); 1002 EXPECT_EQ(r[2], L"c"); 1003 r.clear(); 1004 1005 SplitString(L"a,,c", L',', &r); 1006 ASSERT_EQ(3U, r.size()); 1007 EXPECT_EQ(r[0], L"a"); 1008 EXPECT_EQ(r[1], L""); 1009 EXPECT_EQ(r[2], L"c"); 1010 r.clear(); 1011 1012 SplitString(L"", L'*', &r); 1013 ASSERT_EQ(1U, r.size()); 1014 EXPECT_EQ(r[0], L""); 1015 r.clear(); 1016 1017 SplitString(L"foo", L'*', &r); 1018 ASSERT_EQ(1U, r.size()); 1019 EXPECT_EQ(r[0], L"foo"); 1020 r.clear(); 1021 1022 SplitString(L"foo ,", L',', &r); 1023 ASSERT_EQ(2U, r.size()); 1024 EXPECT_EQ(r[0], L"foo"); 1025 EXPECT_EQ(r[1], L""); 1026 r.clear(); 1027 1028 SplitString(L",", L',', &r); 1029 ASSERT_EQ(2U, r.size()); 1030 EXPECT_EQ(r[0], L""); 1031 EXPECT_EQ(r[1], L""); 1032 r.clear(); 1033 1034 SplitString(L"\t\ta\t", L'\t', &r); 1035 ASSERT_EQ(4U, r.size()); 1036 EXPECT_EQ(r[0], L""); 1037 EXPECT_EQ(r[1], L""); 1038 EXPECT_EQ(r[2], L"a"); 1039 EXPECT_EQ(r[3], L""); 1040 r.clear(); 1041 1042 SplitStringDontTrim(L"\t\ta\t", L'\t', &r); 1043 ASSERT_EQ(4U, r.size()); 1044 EXPECT_EQ(r[0], L""); 1045 EXPECT_EQ(r[1], L""); 1046 EXPECT_EQ(r[2], L"a"); 1047 EXPECT_EQ(r[3], L""); 1048 r.clear(); 1049 1050 SplitString(L"\ta\t\nb\tcc", L'\n', &r); 1051 ASSERT_EQ(2U, r.size()); 1052 EXPECT_EQ(r[0], L"a"); 1053 EXPECT_EQ(r[1], L"b\tcc"); 1054 r.clear(); 1055 1056 SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r); 1057 ASSERT_EQ(2U, r.size()); 1058 EXPECT_EQ(r[0], L"\ta\t"); 1059 EXPECT_EQ(r[1], L"b\tcc"); 1060 r.clear(); 1061 } 1062 1063 // Test for Tokenize 1064 TEST(StringUtilTest, Tokenize) { 1065 std::vector<std::string> r; 1066 size_t size; 1067 1068 size = Tokenize("This is a string", " ", &r); 1069 EXPECT_EQ(4U, size); 1070 ASSERT_EQ(4U, r.size()); 1071 EXPECT_EQ(r[0], "This"); 1072 EXPECT_EQ(r[1], "is"); 1073 EXPECT_EQ(r[2], "a"); 1074 EXPECT_EQ(r[3], "string"); 1075 r.clear(); 1076 1077 size = Tokenize("one,two,three", ",", &r); 1078 EXPECT_EQ(3U, size); 1079 ASSERT_EQ(3U, r.size()); 1080 EXPECT_EQ(r[0], "one"); 1081 EXPECT_EQ(r[1], "two"); 1082 EXPECT_EQ(r[2], "three"); 1083 r.clear(); 1084 1085 size = Tokenize("one,two:three;four", ",:", &r); 1086 EXPECT_EQ(3U, size); 1087 ASSERT_EQ(3U, r.size()); 1088 EXPECT_EQ(r[0], "one"); 1089 EXPECT_EQ(r[1], "two"); 1090 EXPECT_EQ(r[2], "three;four"); 1091 r.clear(); 1092 1093 size = Tokenize("one,two:three;four", ";,:", &r); 1094 EXPECT_EQ(4U, size); 1095 ASSERT_EQ(4U, r.size()); 1096 EXPECT_EQ(r[0], "one"); 1097 EXPECT_EQ(r[1], "two"); 1098 EXPECT_EQ(r[2], "three"); 1099 EXPECT_EQ(r[3], "four"); 1100 r.clear(); 1101 1102 size = Tokenize("one, two, three", ",", &r); 1103 EXPECT_EQ(3U, size); 1104 ASSERT_EQ(3U, r.size()); 1105 EXPECT_EQ(r[0], "one"); 1106 EXPECT_EQ(r[1], " two"); 1107 EXPECT_EQ(r[2], " three"); 1108 r.clear(); 1109 1110 size = Tokenize("one, two, three, ", ",", &r); 1111 EXPECT_EQ(4U, size); 1112 ASSERT_EQ(4U, r.size()); 1113 EXPECT_EQ(r[0], "one"); 1114 EXPECT_EQ(r[1], " two"); 1115 EXPECT_EQ(r[2], " three"); 1116 EXPECT_EQ(r[3], " "); 1117 r.clear(); 1118 1119 size = Tokenize("one, two, three,", ",", &r); 1120 EXPECT_EQ(3U, size); 1121 ASSERT_EQ(3U, r.size()); 1122 EXPECT_EQ(r[0], "one"); 1123 EXPECT_EQ(r[1], " two"); 1124 EXPECT_EQ(r[2], " three"); 1125 r.clear(); 1126 1127 size = Tokenize("", ",", &r); 1128 EXPECT_EQ(0U, size); 1129 ASSERT_EQ(0U, r.size()); 1130 r.clear(); 1131 1132 size = Tokenize(",", ",", &r); 1133 EXPECT_EQ(0U, size); 1134 ASSERT_EQ(0U, r.size()); 1135 r.clear(); 1136 1137 size = Tokenize(",;:.", ".:;,", &r); 1138 EXPECT_EQ(0U, size); 1139 ASSERT_EQ(0U, r.size()); 1140 r.clear(); 1141 1142 size = Tokenize("\t\ta\t", "\t", &r); 1143 EXPECT_EQ(1U, size); 1144 ASSERT_EQ(1U, r.size()); 1145 EXPECT_EQ(r[0], "a"); 1146 r.clear(); 1147 1148 size = Tokenize("\ta\t\nb\tcc", "\n", &r); 1149 EXPECT_EQ(2U, size); 1150 ASSERT_EQ(2U, r.size()); 1151 EXPECT_EQ(r[0], "\ta\t"); 1152 EXPECT_EQ(r[1], "b\tcc"); 1153 r.clear(); 1154 } 1155 1156 // Test for JoinString 1157 TEST(StringUtilTest, JoinString) { 1158 std::vector<std::string> in; 1159 EXPECT_EQ("", JoinString(in, ',')); 1160 1161 in.push_back("a"); 1162 EXPECT_EQ("a", JoinString(in, ',')); 1163 1164 in.push_back("b"); 1165 in.push_back("c"); 1166 EXPECT_EQ("a,b,c", JoinString(in, ',')); 1167 1168 in.push_back(""); 1169 EXPECT_EQ("a,b,c,", JoinString(in, ',')); 1170 in.push_back(" "); 1171 EXPECT_EQ("a|b|c|| ", JoinString(in, '|')); 1172 } 1173 1174 TEST(StringUtilTest, StartsWith) { 1175 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true)); 1176 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true)); 1177 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false)); 1178 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false)); 1179 EXPECT_FALSE(StartsWithASCII("java", "javascript", true)); 1180 EXPECT_FALSE(StartsWithASCII("java", "javascript", false)); 1181 EXPECT_FALSE(StartsWithASCII("", "javascript", false)); 1182 EXPECT_FALSE(StartsWithASCII("", "javascript", true)); 1183 EXPECT_TRUE(StartsWithASCII("java", "", false)); 1184 EXPECT_TRUE(StartsWithASCII("java", "", true)); 1185 1186 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true)); 1187 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true)); 1188 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false)); 1189 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false)); 1190 EXPECT_FALSE(StartsWith(L"java", L"javascript", true)); 1191 EXPECT_FALSE(StartsWith(L"java", L"javascript", false)); 1192 EXPECT_FALSE(StartsWith(L"", L"javascript", false)); 1193 EXPECT_FALSE(StartsWith(L"", L"javascript", true)); 1194 EXPECT_TRUE(StartsWith(L"java", L"", false)); 1195 EXPECT_TRUE(StartsWith(L"java", L"", true)); 1196 } 1197 1198 TEST(StringUtilTest, EndsWith) { 1199 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true)); 1200 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true)); 1201 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false)); 1202 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false)); 1203 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true)); 1204 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false)); 1205 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true)); 1206 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false)); 1207 EXPECT_FALSE(EndsWith(L"", L".plugin", false)); 1208 EXPECT_FALSE(EndsWith(L"", L".plugin", true)); 1209 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false)); 1210 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true)); 1211 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false)); 1212 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true)); 1213 EXPECT_TRUE(EndsWith(L"", L"", false)); 1214 EXPECT_TRUE(EndsWith(L"", L"", true)); 1215 } 1216 1217 TEST(StringUtilTest, GetStringFWithOffsets) { 1218 std::vector<string16> subst; 1219 subst.push_back(ASCIIToUTF16("1")); 1220 subst.push_back(ASCIIToUTF16("2")); 1221 std::vector<size_t> offsets; 1222 1223 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), 1224 subst, 1225 &offsets); 1226 EXPECT_EQ(2U, offsets.size()); 1227 EXPECT_EQ(7U, offsets[0]); 1228 EXPECT_EQ(25U, offsets[1]); 1229 offsets.clear(); 1230 1231 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), 1232 subst, 1233 &offsets); 1234 EXPECT_EQ(2U, offsets.size()); 1235 EXPECT_EQ(25U, offsets[0]); 1236 EXPECT_EQ(7U, offsets[1]); 1237 offsets.clear(); 1238 } 1239 1240 TEST(StringUtilTest, ReplaceStringPlaceholders) { 1241 std::vector<string16> subst; 1242 subst.push_back(ASCIIToUTF16("9a")); 1243 subst.push_back(ASCIIToUTF16("8b")); 1244 subst.push_back(ASCIIToUTF16("7c")); 1245 subst.push_back(ASCIIToUTF16("6d")); 1246 subst.push_back(ASCIIToUTF16("5e")); 1247 subst.push_back(ASCIIToUTF16("4f")); 1248 subst.push_back(ASCIIToUTF16("3g")); 1249 subst.push_back(ASCIIToUTF16("2h")); 1250 subst.push_back(ASCIIToUTF16("1i")); 1251 1252 string16 formatted = 1253 ReplaceStringPlaceholders( 1254 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); 1255 1256 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); 1257 } 1258 1259 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { 1260 // Test whether replacestringplaceholders works as expected when there 1261 // are fewer inputs than outputs. 1262 std::vector<string16> subst; 1263 subst.push_back(ASCIIToUTF16("9a")); 1264 subst.push_back(ASCIIToUTF16("8b")); 1265 subst.push_back(ASCIIToUTF16("7c")); 1266 1267 string16 formatted = 1268 ReplaceStringPlaceholders( 1269 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); 1270 1271 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); 1272 } 1273 1274 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { 1275 std::vector<std::string> subst; 1276 subst.push_back("9a"); 1277 subst.push_back("8b"); 1278 subst.push_back("7c"); 1279 subst.push_back("6d"); 1280 subst.push_back("5e"); 1281 subst.push_back("4f"); 1282 subst.push_back("3g"); 1283 subst.push_back("2h"); 1284 subst.push_back("1i"); 1285 1286 std::string formatted = 1287 ReplaceStringPlaceholders( 1288 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); 1289 1290 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); 1291 } 1292 1293 TEST(StringUtilTest, SplitStringAlongWhitespace) { 1294 struct TestData { 1295 const std::wstring input; 1296 const size_t expected_result_count; 1297 const std::wstring output1; 1298 const std::wstring output2; 1299 } data[] = { 1300 { L"a", 1, L"a", L"" }, 1301 { L" ", 0, L"", L"" }, 1302 { L" a", 1, L"a", L"" }, 1303 { L" ab ", 1, L"ab", L"" }, 1304 { L" ab c", 2, L"ab", L"c" }, 1305 { L" ab c ", 2, L"ab", L"c" }, 1306 { L" ab cd", 2, L"ab", L"cd" }, 1307 { L" ab cd ", 2, L"ab", L"cd" }, 1308 { L" \ta\t", 1, L"a", L"" }, 1309 { L" b\ta\t", 2, L"b", L"a" }, 1310 { L" b\tat", 2, L"b", L"at" }, 1311 { L"b\tat", 2, L"b", L"at" }, 1312 { L"b\t at", 2, L"b", L"at" }, 1313 }; 1314 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { 1315 std::vector<std::wstring> results; 1316 SplitStringAlongWhitespace(data[i].input, &results); 1317 ASSERT_EQ(data[i].expected_result_count, results.size()); 1318 if (data[i].expected_result_count > 0) 1319 ASSERT_EQ(data[i].output1, results[0]); 1320 if (data[i].expected_result_count > 1) 1321 ASSERT_EQ(data[i].output2, results[1]); 1322 } 1323 } 1324 1325 TEST(StringUtilTest, MatchPatternTest) { 1326 EXPECT_EQ(MatchPatternASCII("www.google.com", "*.com"), true); 1327 EXPECT_EQ(MatchPatternASCII("www.google.com", "*"), true); 1328 EXPECT_EQ(MatchPatternASCII("www.google.com", "www*.g*.org"), false); 1329 EXPECT_EQ(MatchPatternASCII("Hello", "H?l?o"), true); 1330 EXPECT_EQ(MatchPatternASCII("www.google.com", "http://*)"), false); 1331 EXPECT_EQ(MatchPatternASCII("www.msn.com", "*.COM"), false); 1332 EXPECT_EQ(MatchPatternASCII("Hello*1234", "He??o\\*1*"), true); 1333 EXPECT_EQ(MatchPatternASCII("", "*.*"), false); 1334 EXPECT_EQ(MatchPatternASCII("", "*"), true); 1335 EXPECT_EQ(MatchPatternASCII("", "?"), true); 1336 EXPECT_EQ(MatchPatternASCII("", ""), true); 1337 EXPECT_EQ(MatchPatternASCII("Hello", ""), false); 1338 EXPECT_EQ(MatchPatternASCII("Hello*", "Hello*"), true); 1339 // Stop after a certain recursion depth. 1340 EXPECT_EQ(MatchPatternASCII("12345678901234567890", "???????????????????*"), 1341 false); 1342 } 1343 1344 TEST(StringUtilTest, LcpyTest) { 1345 // Test the normal case where we fit in our buffer. 1346 { 1347 char dst[10]; 1348 wchar_t wdst[10]; 1349 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1350 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1351 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1352 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1353 } 1354 1355 // Test dst_size == 0, nothing should be written to |dst| and we should 1356 // have the equivalent of strlen(src). 1357 { 1358 char dst[2] = {1, 2}; 1359 wchar_t wdst[2] = {1, 2}; 1360 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); 1361 EXPECT_EQ(1, dst[0]); 1362 EXPECT_EQ(2, dst[1]); 1363 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); 1364 #if defined(WCHAR_T_IS_UNSIGNED) 1365 EXPECT_EQ(1U, wdst[0]); 1366 EXPECT_EQ(2U, wdst[1]); 1367 #else 1368 EXPECT_EQ(1, wdst[0]); 1369 EXPECT_EQ(2, wdst[1]); 1370 #endif 1371 } 1372 1373 // Test the case were we _just_ competely fit including the null. 1374 { 1375 char dst[8]; 1376 wchar_t wdst[8]; 1377 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1378 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1379 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1380 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1381 } 1382 1383 // Test the case were we we are one smaller, so we can't fit the null. 1384 { 1385 char dst[7]; 1386 wchar_t wdst[7]; 1387 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1388 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); 1389 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1390 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); 1391 } 1392 1393 // Test the case were we are just too small. 1394 { 1395 char dst[3]; 1396 wchar_t wdst[3]; 1397 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1398 EXPECT_EQ(0, memcmp(dst, "ab", 3)); 1399 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1400 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); 1401 } 1402 } 1403 1404 TEST(StringUtilTest, WprintfFormatPortabilityTest) { 1405 struct TestData { 1406 const wchar_t* input; 1407 bool portable; 1408 } cases[] = { 1409 { L"%ls", true }, 1410 { L"%s", false }, 1411 { L"%S", false }, 1412 { L"%lS", false }, 1413 { L"Hello, %s", false }, 1414 { L"%lc", true }, 1415 { L"%c", false }, 1416 { L"%C", false }, 1417 { L"%lC", false }, 1418 { L"%ls %s", false }, 1419 { L"%s %ls", false }, 1420 { L"%s %ls %s", false }, 1421 { L"%f", true }, 1422 { L"%f %F", false }, 1423 { L"%d %D", false }, 1424 { L"%o %O", false }, 1425 { L"%u %U", false }, 1426 { L"%f %d %o %u", true }, 1427 { L"%-8d (%02.1f%)", true }, 1428 { L"% 10s", false }, 1429 { L"% 10ls", true } 1430 }; 1431 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 1432 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); 1433 } 1434 } 1435 1436 TEST(StringUtilTest, ElideString) { 1437 struct TestData { 1438 const wchar_t* input; 1439 int max_len; 1440 bool result; 1441 const wchar_t* output; 1442 } cases[] = { 1443 { L"Hello", 0, true, L"" }, 1444 { L"", 0, false, L"" }, 1445 { L"Hello, my name is Tom", 1, true, L"H" }, 1446 { L"Hello, my name is Tom", 2, true, L"He" }, 1447 { L"Hello, my name is Tom", 3, true, L"H.m" }, 1448 { L"Hello, my name is Tom", 4, true, L"H..m" }, 1449 { L"Hello, my name is Tom", 5, true, L"H...m" }, 1450 { L"Hello, my name is Tom", 6, true, L"He...m" }, 1451 { L"Hello, my name is Tom", 7, true, L"He...om" }, 1452 { L"Hello, my name is Tom", 10, true, L"Hell...Tom" }, 1453 { L"Hello, my name is Tom", 100, false, L"Hello, my name is Tom" } 1454 }; 1455 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 1456 std::wstring output; 1457 EXPECT_EQ(cases[i].result, 1458 ElideString(cases[i].input, cases[i].max_len, &output)); 1459 EXPECT_TRUE(output == cases[i].output); 1460 } 1461 } 1462 1463 TEST(StringUtilTest, HexEncode) { 1464 std::string hex(HexEncode(NULL, 0)); 1465 EXPECT_EQ(hex.length(), 0U); 1466 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81}; 1467 hex = HexEncode(bytes, sizeof(bytes)); 1468 EXPECT_EQ(hex.compare("01FF02FE038081"), 0); 1469 } 1470 1471 } // namaspace base 1472