1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <math.h> 6 #include <stdarg.h> 7 8 #include <limits> 9 #include <sstream> 10 11 #include "base/basictypes.h" 12 #include "base/string_util.h" 13 #include "base/utf_string_conversions.h" 14 #include "testing/gmock/include/gmock/gmock.h" 15 #include "testing/gtest/include/gtest/gtest.h" 16 17 using ::testing::ElementsAre; 18 19 namespace base { 20 21 static const struct trim_case { 22 const wchar_t* input; 23 const TrimPositions positions; 24 const wchar_t* output; 25 const TrimPositions return_value; 26 } trim_cases[] = { 27 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, 28 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, 29 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, 30 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, 31 {L"", TRIM_ALL, L"", TRIM_NONE}, 32 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, 33 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, 34 {L" ", TRIM_ALL, L"", TRIM_ALL}, 35 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, 36 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, 37 }; 38 39 static const struct trim_case_ascii { 40 const char* input; 41 const TrimPositions positions; 42 const char* output; 43 const TrimPositions return_value; 44 } trim_cases_ascii[] = { 45 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, 46 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, 47 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, 48 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, 49 {"", TRIM_ALL, "", TRIM_NONE}, 50 {" ", TRIM_LEADING, "", TRIM_LEADING}, 51 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, 52 {" ", TRIM_ALL, "", TRIM_ALL}, 53 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, 54 }; 55 56 namespace { 57 58 // Helper used to test TruncateUTF8ToByteSize. 59 bool Truncated(const std::string& input, const size_t byte_size, 60 std::string* output) { 61 size_t prev = input.length(); 62 TruncateUTF8ToByteSize(input, byte_size, output); 63 return prev != output->length(); 64 } 65 66 } // namespace 67 68 TEST(StringUtilTest, TruncateUTF8ToByteSize) { 69 std::string output; 70 71 // Empty strings and invalid byte_size arguments 72 EXPECT_FALSE(Truncated("", 0, &output)); 73 EXPECT_EQ(output, ""); 74 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output)); 75 EXPECT_EQ(output, ""); 76 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output)); 77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output)); 78 79 // Testing the truncation of valid UTF8 correctly 80 EXPECT_TRUE(Truncated("abc", 2, &output)); 81 EXPECT_EQ(output, "ab"); 82 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output)); 83 EXPECT_EQ(output.compare("\xc2\x81"), 0); 84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output)); 85 EXPECT_EQ(output.compare("\xc2\x81"), 0); 86 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output)); 87 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0); 88 89 { 90 const char array[] = "\x00\x00\xc2\x81\xc2\x81"; 91 const std::string array_string(array, arraysize(array)); 92 EXPECT_TRUE(Truncated(array_string, 4, &output)); 93 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0); 94 } 95 96 { 97 const char array[] = "\x00\xc2\x81\xc2\x81"; 98 const std::string array_string(array, arraysize(array)); 99 EXPECT_TRUE(Truncated(array_string, 4, &output)); 100 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0); 101 } 102 103 // Testing invalid UTF8 104 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output)); 105 EXPECT_EQ(output.compare(""), 0); 106 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output)); 107 EXPECT_EQ(output.compare(""), 0); 108 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output)); 109 EXPECT_EQ(output.compare(""), 0); 110 111 // Testing invalid UTF8 mixed with valid UTF8 112 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output)); 113 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0); 114 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output)); 115 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0); 116 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", 117 10, &output)); 118 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0); 119 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0", 120 10, &output)); 121 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0); 122 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output)); 123 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0); 124 125 // Overlong sequences 126 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output)); 127 EXPECT_EQ(output.compare(""), 0); 128 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output)); 129 EXPECT_EQ(output.compare(""), 0); 130 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output)); 131 EXPECT_EQ(output.compare(""), 0); 132 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output)); 133 EXPECT_EQ(output.compare(""), 0); 134 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output)); 135 EXPECT_EQ(output.compare(""), 0); 136 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output)); 137 EXPECT_EQ(output.compare(""), 0); 138 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output)); 139 EXPECT_EQ(output.compare(""), 0); 140 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output)); 141 EXPECT_EQ(output.compare(""), 0); 142 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output)); 143 EXPECT_EQ(output.compare(""), 0); 144 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output)); 145 EXPECT_EQ(output.compare(""), 0); 146 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output)); 147 EXPECT_EQ(output.compare(""), 0); 148 149 // Beyond U+10FFFF (the upper limit of Unicode codespace) 150 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output)); 151 EXPECT_EQ(output.compare(""), 0); 152 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output)); 153 EXPECT_EQ(output.compare(""), 0); 154 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output)); 155 EXPECT_EQ(output.compare(""), 0); 156 157 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 158 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output)); 159 EXPECT_EQ(output.compare(""), 0); 160 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output)); 161 EXPECT_EQ(output.compare(""), 0); 162 163 { 164 const char array[] = "\x00\x00\xfe\xff"; 165 const std::string array_string(array, arraysize(array)); 166 EXPECT_TRUE(Truncated(array_string, 4, &output)); 167 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0); 168 } 169 170 // Variants on the previous test 171 { 172 const char array[] = "\xff\xfe\x00\x00"; 173 const std::string array_string(array, 4); 174 EXPECT_FALSE(Truncated(array_string, 4, &output)); 175 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0); 176 } 177 { 178 const char array[] = "\xff\x00\x00\xfe"; 179 const std::string array_string(array, arraysize(array)); 180 EXPECT_TRUE(Truncated(array_string, 4, &output)); 181 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0); 182 } 183 184 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 185 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output)); 186 EXPECT_EQ(output.compare(""), 0); 187 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output)); 188 EXPECT_EQ(output.compare(""), 0); 189 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output)); 190 EXPECT_EQ(output.compare(""), 0); 191 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output)); 192 EXPECT_EQ(output.compare(""), 0); 193 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output)); 194 EXPECT_EQ(output.compare(""), 0); 195 196 // Strings in legacy encodings that are valid in UTF-8, but 197 // are invalid as UTF-8 in real data. 198 EXPECT_TRUE(Truncated("caf\xe9", 4, &output)); 199 EXPECT_EQ(output.compare("caf"), 0); 200 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output)); 201 EXPECT_EQ(output.compare(""), 0); 202 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output)); 203 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 204 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7, 205 &output)); 206 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 207 208 // Testing using the same string as input and output. 209 EXPECT_FALSE(Truncated(output, 4, &output)); 210 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 211 EXPECT_TRUE(Truncated(output, 3, &output)); 212 EXPECT_EQ(output.compare("\xa7\x41"), 0); 213 214 // "abc" with U+201[CD] in windows-125[0-8] 215 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output)); 216 EXPECT_EQ(output.compare("\x93" "abc"), 0); 217 218 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 219 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output)); 220 EXPECT_EQ(output.compare(""), 0); 221 222 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 223 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output)); 224 EXPECT_EQ(output.compare(""), 0); 225 } 226 227 TEST(StringUtilTest, TrimWhitespace) { 228 std::wstring output; // Allow contents to carry over to next testcase 229 for (size_t i = 0; i < arraysize(trim_cases); ++i) { 230 const trim_case& value = trim_cases[i]; 231 EXPECT_EQ(value.return_value, 232 TrimWhitespace(value.input, value.positions, &output)); 233 EXPECT_EQ(value.output, output); 234 } 235 236 // Test that TrimWhitespace() can take the same string for input and output 237 output = L" This is a test \r\n"; 238 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 239 EXPECT_EQ(L"This is a test", output); 240 241 // Once more, but with a string of whitespace 242 output = L" \r\n"; 243 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 244 EXPECT_EQ(L"", output); 245 246 std::string output_ascii; 247 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { 248 const trim_case_ascii& value = trim_cases_ascii[i]; 249 EXPECT_EQ(value.return_value, 250 TrimWhitespace(value.input, value.positions, &output_ascii)); 251 EXPECT_EQ(value.output, output_ascii); 252 } 253 } 254 255 static const struct collapse_case { 256 const wchar_t* input; 257 const bool trim; 258 const wchar_t* output; 259 } collapse_cases[] = { 260 {L" Google Video ", false, L"Google Video"}, 261 {L"Google Video", false, L"Google Video"}, 262 {L"", false, L""}, 263 {L" ", false, L""}, 264 {L"\t\rTest String\n", false, L"Test String"}, 265 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, 266 {L" Test \n \t String ", false, L"Test String"}, 267 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, 268 {L" Test String", false, L"Test String"}, 269 {L"Test String ", false, L"Test String"}, 270 {L"Test String", false, L"Test String"}, 271 {L"", true, L""}, 272 {L"\n", true, L""}, 273 {L" \r ", true, L""}, 274 {L"\nFoo", true, L"Foo"}, 275 {L"\r Foo ", true, L"Foo"}, 276 {L" Foo bar ", true, L"Foo bar"}, 277 {L" \tFoo bar \n", true, L"Foo bar"}, 278 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, 279 }; 280 281 TEST(StringUtilTest, CollapseWhitespace) { 282 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { 283 const collapse_case& value = collapse_cases[i]; 284 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); 285 } 286 } 287 288 static const struct collapse_case_ascii { 289 const char* input; 290 const bool trim; 291 const char* output; 292 } collapse_cases_ascii[] = { 293 {" Google Video ", false, "Google Video"}, 294 {"Google Video", false, "Google Video"}, 295 {"", false, ""}, 296 {" ", false, ""}, 297 {"\t\rTest String\n", false, "Test String"}, 298 {" Test \n \t String ", false, "Test String"}, 299 {" Test String", false, "Test String"}, 300 {"Test String ", false, "Test String"}, 301 {"Test String", false, "Test String"}, 302 {"", true, ""}, 303 {"\n", true, ""}, 304 {" \r ", true, ""}, 305 {"\nFoo", true, "Foo"}, 306 {"\r Foo ", true, "Foo"}, 307 {" Foo bar ", true, "Foo bar"}, 308 {" \tFoo bar \n", true, "Foo bar"}, 309 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, 310 }; 311 312 TEST(StringUtilTest, CollapseWhitespaceASCII) { 313 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { 314 const collapse_case_ascii& value = collapse_cases_ascii[i]; 315 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); 316 } 317 } 318 319 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) { 320 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("")); 321 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" ")); 322 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t")); 323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n ")); 324 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a")); 325 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n ")); 326 } 327 328 TEST(StringUtilTest, ContainsOnlyWhitespace) { 329 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(""))); 330 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" "))); 331 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t"))); 332 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n "))); 333 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a"))); 334 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n "))); 335 } 336 337 TEST(StringUtilTest, IsStringUTF8) { 338 EXPECT_TRUE(IsStringUTF8("abc")); 339 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); 340 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); 341 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); 342 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); 343 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM 344 345 // surrogate code points 346 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); 347 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); 348 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); 349 350 // overlong sequences 351 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 352 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" 353 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 354 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 355 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff 356 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D 357 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 358 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 359 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) 360 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F 361 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 362 363 // Beyond U+10FFFF (the upper limit of Unicode codespace) 364 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 365 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes 366 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes 367 368 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 369 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); 370 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); 371 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); 372 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); 373 374 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 375 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) 376 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE 377 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF 378 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 379 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF 380 // Strings in legacy encodings. We can certainly make up strings 381 // in a legacy encoding that are valid in UTF-8, but in real data, 382 // most of them are invalid as UTF-8. 383 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 384 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR 385 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 386 // "abc" with U+201[CD] in windows-125[0-8] 387 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); 388 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 389 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); 390 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 391 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); 392 393 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 394 // representation, and the second uses a 2-byte sequence. The second version 395 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a 396 // given codepoint must be used. 397 static const char kEmbeddedNull[] = "embedded\0null"; 398 EXPECT_TRUE(IsStringUTF8( 399 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); 400 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); 401 } 402 403 TEST(StringUtilTest, ConvertASCII) { 404 static const char* char_cases[] = { 405 "Google Video", 406 "Hello, world\n", 407 "0123ABCDwxyz \a\b\t\r\n!+,.~" 408 }; 409 410 static const wchar_t* const wchar_cases[] = { 411 L"Google Video", 412 L"Hello, world\n", 413 L"0123ABCDwxyz \a\b\t\r\n!+,.~" 414 }; 415 416 for (size_t i = 0; i < arraysize(char_cases); ++i) { 417 EXPECT_TRUE(IsStringASCII(char_cases[i])); 418 std::wstring wide = ASCIIToWide(char_cases[i]); 419 EXPECT_EQ(wchar_cases[i], wide); 420 421 EXPECT_TRUE(IsStringASCII(wchar_cases[i])); 422 std::string ascii = WideToASCII(wchar_cases[i]); 423 EXPECT_EQ(char_cases[i], ascii); 424 } 425 426 EXPECT_FALSE(IsStringASCII("Google \x80Video")); 427 EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); 428 429 // Convert empty strings. 430 std::wstring wempty; 431 std::string empty; 432 EXPECT_EQ(empty, WideToASCII(wempty)); 433 EXPECT_EQ(wempty, ASCIIToWide(empty)); 434 435 // Convert strings with an embedded NUL character. 436 const char chars_with_nul[] = "test\0string"; 437 const int length_with_nul = arraysize(chars_with_nul) - 1; 438 std::string string_with_nul(chars_with_nul, length_with_nul); 439 std::wstring wide_with_nul = ASCIIToWide(string_with_nul); 440 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), 441 wide_with_nul.length()); 442 std::string narrow_with_nul = WideToASCII(wide_with_nul); 443 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), 444 narrow_with_nul.length()); 445 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); 446 } 447 448 TEST(StringUtilTest, ToUpperASCII) { 449 EXPECT_EQ('C', ToUpperASCII('C')); 450 EXPECT_EQ('C', ToUpperASCII('c')); 451 EXPECT_EQ('2', ToUpperASCII('2')); 452 453 EXPECT_EQ(L'C', ToUpperASCII(L'C')); 454 EXPECT_EQ(L'C', ToUpperASCII(L'c')); 455 EXPECT_EQ(L'2', ToUpperASCII(L'2')); 456 457 std::string in_place_a("Cc2"); 458 StringToUpperASCII(&in_place_a); 459 EXPECT_EQ("CC2", in_place_a); 460 461 std::wstring in_place_w(L"Cc2"); 462 StringToUpperASCII(&in_place_w); 463 EXPECT_EQ(L"CC2", in_place_w); 464 465 std::string original_a("Cc2"); 466 std::string upper_a = StringToUpperASCII(original_a); 467 EXPECT_EQ("CC2", upper_a); 468 469 std::wstring original_w(L"Cc2"); 470 std::wstring upper_w = StringToUpperASCII(original_w); 471 EXPECT_EQ(L"CC2", upper_w); 472 } 473 474 static const struct { 475 const wchar_t* src_w; 476 const char* src_a; 477 const char* dst; 478 } lowercase_cases[] = { 479 {L"FoO", "FoO", "foo"}, 480 {L"foo", "foo", "foo"}, 481 {L"FOO", "FOO", "foo"}, 482 }; 483 484 TEST(StringUtilTest, LowerCaseEqualsASCII) { 485 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { 486 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, 487 lowercase_cases[i].dst)); 488 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, 489 lowercase_cases[i].dst)); 490 } 491 } 492 493 TEST(StringUtilTest, GetByteDisplayUnits) { 494 static const struct { 495 int64 bytes; 496 DataUnits expected; 497 } cases[] = { 498 {0, DATA_UNITS_BYTE}, 499 {512, DATA_UNITS_BYTE}, 500 {10*1024, DATA_UNITS_KIBIBYTE}, 501 {10*1024*1024, DATA_UNITS_MEBIBYTE}, 502 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE}, 503 {~(1LL<<63), DATA_UNITS_GIBIBYTE}, 504 #ifdef NDEBUG 505 {-1, DATA_UNITS_BYTE}, 506 #endif 507 }; 508 509 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 510 EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes)); 511 } 512 513 TEST(StringUtilTest, FormatBytes) { 514 static const struct { 515 int64 bytes; 516 DataUnits units; 517 const char* expected; 518 const char* expected_with_units; 519 } cases[] = { 520 // Expected behavior: we show one post-decimal digit when we have 521 // under two pre-decimal digits, except in cases where it makes no 522 // sense (zero or bytes). 523 // Since we switch units once we cross the 1000 mark, this keeps 524 // the display of file sizes or bytes consistently around three 525 // digits. 526 {0, DATA_UNITS_BYTE, "0", "0 B"}, 527 {512, DATA_UNITS_BYTE, "512", "512 B"}, 528 {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"}, 529 {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"}, 530 {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"}, 531 {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"}, 532 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"}, 533 {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"}, 534 {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"}, 535 {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE, 536 "105", "105 GB"}, 537 {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"}, 538 539 {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"}, 540 {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"}, 541 {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"}, 542 {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE, 543 "1.9", "1.9 GB"}, 544 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"}, 545 {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"}, 546 #ifdef NDEBUG 547 {-1, DATA_UNITS_BYTE, "", ""}, 548 #endif 549 }; 550 551 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 552 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), 553 FormatBytes(cases[i].bytes, cases[i].units, false)); 554 EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units), 555 FormatBytes(cases[i].bytes, cases[i].units, true)); 556 } 557 } 558 559 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { 560 static const struct { 561 const char* str; 562 string16::size_type start_offset; 563 const char* find_this; 564 const char* replace_with; 565 const char* expected; 566 } cases[] = { 567 {"aaa", 0, "a", "b", "bbb"}, 568 {"abb", 0, "ab", "a", "ab"}, 569 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, 570 {"Not found", 0, "x", "0", "Not found"}, 571 {"Not found again", 5, "x", "0", "Not found again"}, 572 {" Making it much longer ", 0, " ", "Four score and seven years ago", 573 "Four score and seven years agoMakingFour score and seven years agoit" 574 "Four score and seven years agomuchFour score and seven years agolonger" 575 "Four score and seven years ago"}, 576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 577 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, 578 {"abababab", 2, "ab", "c", "abccc"}, 579 }; 580 581 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 582 string16 str = ASCIIToUTF16(cases[i].str); 583 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, 584 ASCIIToUTF16(cases[i].find_this), 585 ASCIIToUTF16(cases[i].replace_with)); 586 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 587 } 588 } 589 590 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { 591 static const struct { 592 const char* str; 593 string16::size_type start_offset; 594 const char* find_this; 595 const char* replace_with; 596 const char* expected; 597 } cases[] = { 598 {"aaa", 0, "a", "b", "baa"}, 599 {"abb", 0, "ab", "a", "ab"}, 600 {"Removing some substrings inging", 0, "ing", "", 601 "Remov some substrings inging"}, 602 {"Not found", 0, "x", "0", "Not found"}, 603 {"Not found again", 5, "x", "0", "Not found again"}, 604 {" Making it much longer ", 0, " ", "Four score and seven years ago", 605 "Four score and seven years agoMaking it much longer "}, 606 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 607 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, 608 {"abababab", 2, "ab", "c", "abcabab"}, 609 }; 610 611 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 612 string16 str = ASCIIToUTF16(cases[i].str); 613 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, 614 ASCIIToUTF16(cases[i].find_this), 615 ASCIIToUTF16(cases[i].replace_with)); 616 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 617 } 618 } 619 620 TEST(StringUtilTest, HexDigitToInt) { 621 EXPECT_EQ(0, HexDigitToInt('0')); 622 EXPECT_EQ(1, HexDigitToInt('1')); 623 EXPECT_EQ(2, HexDigitToInt('2')); 624 EXPECT_EQ(3, HexDigitToInt('3')); 625 EXPECT_EQ(4, HexDigitToInt('4')); 626 EXPECT_EQ(5, HexDigitToInt('5')); 627 EXPECT_EQ(6, HexDigitToInt('6')); 628 EXPECT_EQ(7, HexDigitToInt('7')); 629 EXPECT_EQ(8, HexDigitToInt('8')); 630 EXPECT_EQ(9, HexDigitToInt('9')); 631 EXPECT_EQ(10, HexDigitToInt('A')); 632 EXPECT_EQ(11, HexDigitToInt('B')); 633 EXPECT_EQ(12, HexDigitToInt('C')); 634 EXPECT_EQ(13, HexDigitToInt('D')); 635 EXPECT_EQ(14, HexDigitToInt('E')); 636 EXPECT_EQ(15, HexDigitToInt('F')); 637 638 // Verify the lower case as well. 639 EXPECT_EQ(10, HexDigitToInt('a')); 640 EXPECT_EQ(11, HexDigitToInt('b')); 641 EXPECT_EQ(12, HexDigitToInt('c')); 642 EXPECT_EQ(13, HexDigitToInt('d')); 643 EXPECT_EQ(14, HexDigitToInt('e')); 644 EXPECT_EQ(15, HexDigitToInt('f')); 645 } 646 647 // This checks where we can use the assignment operator for a va_list. We need 648 // a way to do this since Visual C doesn't support va_copy, but assignment on 649 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this 650 // capability. 651 static void VariableArgsFunc(const char* format, ...) { 652 va_list org; 653 va_start(org, format); 654 655 va_list dup; 656 GG_VA_COPY(dup, org); 657 int i1 = va_arg(org, int); 658 int j1 = va_arg(org, int); 659 char* s1 = va_arg(org, char*); 660 double d1 = va_arg(org, double); 661 va_end(org); 662 663 int i2 = va_arg(dup, int); 664 int j2 = va_arg(dup, int); 665 char* s2 = va_arg(dup, char*); 666 double d2 = va_arg(dup, double); 667 668 EXPECT_EQ(i1, i2); 669 EXPECT_EQ(j1, j2); 670 EXPECT_STREQ(s1, s2); 671 EXPECT_EQ(d1, d2); 672 673 va_end(dup); 674 } 675 676 TEST(StringUtilTest, VAList) { 677 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); 678 } 679 680 // Test for Tokenize 681 template <typename STR> 682 void TokenizeTest() { 683 std::vector<STR> r; 684 size_t size; 685 686 size = Tokenize(STR("This is a string"), STR(" "), &r); 687 EXPECT_EQ(4U, size); 688 ASSERT_EQ(4U, r.size()); 689 EXPECT_EQ(r[0], STR("This")); 690 EXPECT_EQ(r[1], STR("is")); 691 EXPECT_EQ(r[2], STR("a")); 692 EXPECT_EQ(r[3], STR("string")); 693 r.clear(); 694 695 size = Tokenize(STR("one,two,three"), STR(","), &r); 696 EXPECT_EQ(3U, size); 697 ASSERT_EQ(3U, r.size()); 698 EXPECT_EQ(r[0], STR("one")); 699 EXPECT_EQ(r[1], STR("two")); 700 EXPECT_EQ(r[2], STR("three")); 701 r.clear(); 702 703 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r); 704 EXPECT_EQ(3U, size); 705 ASSERT_EQ(3U, r.size()); 706 EXPECT_EQ(r[0], STR("one")); 707 EXPECT_EQ(r[1], STR("two")); 708 EXPECT_EQ(r[2], STR("three;four")); 709 r.clear(); 710 711 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r); 712 EXPECT_EQ(4U, size); 713 ASSERT_EQ(4U, r.size()); 714 EXPECT_EQ(r[0], STR("one")); 715 EXPECT_EQ(r[1], STR("two")); 716 EXPECT_EQ(r[2], STR("three")); 717 EXPECT_EQ(r[3], STR("four")); 718 r.clear(); 719 720 size = Tokenize(STR("one, two, three"), STR(","), &r); 721 EXPECT_EQ(3U, size); 722 ASSERT_EQ(3U, r.size()); 723 EXPECT_EQ(r[0], STR("one")); 724 EXPECT_EQ(r[1], STR(" two")); 725 EXPECT_EQ(r[2], STR(" three")); 726 r.clear(); 727 728 size = Tokenize(STR("one, two, three, "), STR(","), &r); 729 EXPECT_EQ(4U, size); 730 ASSERT_EQ(4U, r.size()); 731 EXPECT_EQ(r[0], STR("one")); 732 EXPECT_EQ(r[1], STR(" two")); 733 EXPECT_EQ(r[2], STR(" three")); 734 EXPECT_EQ(r[3], STR(" ")); 735 r.clear(); 736 737 size = Tokenize(STR("one, two, three,"), STR(","), &r); 738 EXPECT_EQ(3U, size); 739 ASSERT_EQ(3U, r.size()); 740 EXPECT_EQ(r[0], STR("one")); 741 EXPECT_EQ(r[1], STR(" two")); 742 EXPECT_EQ(r[2], STR(" three")); 743 r.clear(); 744 745 size = Tokenize(STR(""), STR(","), &r); 746 EXPECT_EQ(0U, size); 747 ASSERT_EQ(0U, r.size()); 748 r.clear(); 749 750 size = Tokenize(STR(","), STR(","), &r); 751 EXPECT_EQ(0U, size); 752 ASSERT_EQ(0U, r.size()); 753 r.clear(); 754 755 size = Tokenize(STR(",;:."), STR(".:;,"), &r); 756 EXPECT_EQ(0U, size); 757 ASSERT_EQ(0U, r.size()); 758 r.clear(); 759 760 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r); 761 EXPECT_EQ(1U, size); 762 ASSERT_EQ(1U, r.size()); 763 EXPECT_EQ(r[0], STR("a")); 764 r.clear(); 765 766 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r); 767 EXPECT_EQ(2U, size); 768 ASSERT_EQ(2U, r.size()); 769 EXPECT_EQ(r[0], STR("\ta\t")); 770 EXPECT_EQ(r[1], STR("b\tcc")); 771 r.clear(); 772 } 773 774 TEST(StringUtilTest, TokenizeStdString) { 775 TokenizeTest<std::string>(); 776 } 777 778 TEST(StringUtilTest, TokenizeStringPiece) { 779 TokenizeTest<base::StringPiece>(); 780 } 781 782 // Test for JoinString 783 TEST(StringUtilTest, JoinString) { 784 std::vector<std::string> in; 785 EXPECT_EQ("", JoinString(in, ',')); 786 787 in.push_back("a"); 788 EXPECT_EQ("a", JoinString(in, ',')); 789 790 in.push_back("b"); 791 in.push_back("c"); 792 EXPECT_EQ("a,b,c", JoinString(in, ',')); 793 794 in.push_back(""); 795 EXPECT_EQ("a,b,c,", JoinString(in, ',')); 796 in.push_back(" "); 797 EXPECT_EQ("a|b|c|| ", JoinString(in, '|')); 798 } 799 800 TEST(StringUtilTest, StartsWith) { 801 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true)); 802 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true)); 803 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false)); 804 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false)); 805 EXPECT_FALSE(StartsWithASCII("java", "javascript", true)); 806 EXPECT_FALSE(StartsWithASCII("java", "javascript", false)); 807 EXPECT_FALSE(StartsWithASCII("", "javascript", false)); 808 EXPECT_FALSE(StartsWithASCII("", "javascript", true)); 809 EXPECT_TRUE(StartsWithASCII("java", "", false)); 810 EXPECT_TRUE(StartsWithASCII("java", "", true)); 811 812 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true)); 813 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true)); 814 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false)); 815 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false)); 816 EXPECT_FALSE(StartsWith(L"java", L"javascript", true)); 817 EXPECT_FALSE(StartsWith(L"java", L"javascript", false)); 818 EXPECT_FALSE(StartsWith(L"", L"javascript", false)); 819 EXPECT_FALSE(StartsWith(L"", L"javascript", true)); 820 EXPECT_TRUE(StartsWith(L"java", L"", false)); 821 EXPECT_TRUE(StartsWith(L"java", L"", true)); 822 } 823 824 TEST(StringUtilTest, EndsWith) { 825 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true)); 826 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true)); 827 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false)); 828 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false)); 829 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true)); 830 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false)); 831 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true)); 832 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false)); 833 EXPECT_FALSE(EndsWith(L"", L".plugin", false)); 834 EXPECT_FALSE(EndsWith(L"", L".plugin", true)); 835 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false)); 836 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true)); 837 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false)); 838 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true)); 839 EXPECT_TRUE(EndsWith(L"", L"", false)); 840 EXPECT_TRUE(EndsWith(L"", L"", true)); 841 } 842 843 TEST(StringUtilTest, GetStringFWithOffsets) { 844 std::vector<string16> subst; 845 subst.push_back(ASCIIToUTF16("1")); 846 subst.push_back(ASCIIToUTF16("2")); 847 std::vector<size_t> offsets; 848 849 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), 850 subst, 851 &offsets); 852 EXPECT_EQ(2U, offsets.size()); 853 EXPECT_EQ(7U, offsets[0]); 854 EXPECT_EQ(25U, offsets[1]); 855 offsets.clear(); 856 857 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), 858 subst, 859 &offsets); 860 EXPECT_EQ(2U, offsets.size()); 861 EXPECT_EQ(25U, offsets[0]); 862 EXPECT_EQ(7U, offsets[1]); 863 offsets.clear(); 864 } 865 866 TEST(StringUtilTest, ReplaceStringPlaceholders) { 867 std::vector<string16> subst; 868 subst.push_back(ASCIIToUTF16("9a")); 869 subst.push_back(ASCIIToUTF16("8b")); 870 subst.push_back(ASCIIToUTF16("7c")); 871 subst.push_back(ASCIIToUTF16("6d")); 872 subst.push_back(ASCIIToUTF16("5e")); 873 subst.push_back(ASCIIToUTF16("4f")); 874 subst.push_back(ASCIIToUTF16("3g")); 875 subst.push_back(ASCIIToUTF16("2h")); 876 subst.push_back(ASCIIToUTF16("1i")); 877 878 string16 formatted = 879 ReplaceStringPlaceholders( 880 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); 881 882 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); 883 } 884 885 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { 886 // Test whether replacestringplaceholders works as expected when there 887 // are fewer inputs than outputs. 888 std::vector<string16> subst; 889 subst.push_back(ASCIIToUTF16("9a")); 890 subst.push_back(ASCIIToUTF16("8b")); 891 subst.push_back(ASCIIToUTF16("7c")); 892 893 string16 formatted = 894 ReplaceStringPlaceholders( 895 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); 896 897 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); 898 } 899 900 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { 901 std::vector<std::string> subst; 902 subst.push_back("9a"); 903 subst.push_back("8b"); 904 subst.push_back("7c"); 905 subst.push_back("6d"); 906 subst.push_back("5e"); 907 subst.push_back("4f"); 908 subst.push_back("3g"); 909 subst.push_back("2h"); 910 subst.push_back("1i"); 911 912 std::string formatted = 913 ReplaceStringPlaceholders( 914 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); 915 916 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); 917 } 918 919 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) { 920 std::vector<std::string> subst; 921 subst.push_back("a"); 922 subst.push_back("b"); 923 subst.push_back("c"); 924 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL), 925 "$1 $$2 $$$3"); 926 } 927 928 TEST(StringUtilTest, MatchPatternTest) { 929 EXPECT_TRUE(MatchPattern("www.google.com", "*.com")); 930 EXPECT_TRUE(MatchPattern("www.google.com", "*")); 931 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org")); 932 EXPECT_TRUE(MatchPattern("Hello", "H?l?o")); 933 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)")); 934 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM")); 935 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*")); 936 EXPECT_FALSE(MatchPattern("", "*.*")); 937 EXPECT_TRUE(MatchPattern("", "*")); 938 EXPECT_TRUE(MatchPattern("", "?")); 939 EXPECT_TRUE(MatchPattern("", "")); 940 EXPECT_FALSE(MatchPattern("Hello", "")); 941 EXPECT_TRUE(MatchPattern("Hello*", "Hello*")); 942 // Stop after a certain recursion depth. 943 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*")); 944 945 // Test UTF8 matching. 946 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0")); 947 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?.")); 948 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*")); 949 // Invalid sequences should be handled as a single invalid character. 950 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?")); 951 // If the pattern has invalid characters, it shouldn't match anything. 952 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80")); 953 954 // Test UTF16 character matching. 955 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"), 956 UTF8ToUTF16("*.com"))); 957 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"), 958 UTF8ToUTF16("He??o\\*1*"))); 959 960 // This test verifies that consecutive wild cards are collapsed into 1 961 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum 962 // recursion depth). 963 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"), 964 UTF8ToUTF16("He********************************o"))); 965 } 966 967 TEST(StringUtilTest, LcpyTest) { 968 // Test the normal case where we fit in our buffer. 969 { 970 char dst[10]; 971 wchar_t wdst[10]; 972 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 973 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 974 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 975 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 976 } 977 978 // Test dst_size == 0, nothing should be written to |dst| and we should 979 // have the equivalent of strlen(src). 980 { 981 char dst[2] = {1, 2}; 982 wchar_t wdst[2] = {1, 2}; 983 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); 984 EXPECT_EQ(1, dst[0]); 985 EXPECT_EQ(2, dst[1]); 986 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); 987 #if defined(WCHAR_T_IS_UNSIGNED) 988 EXPECT_EQ(1U, wdst[0]); 989 EXPECT_EQ(2U, wdst[1]); 990 #else 991 EXPECT_EQ(1, wdst[0]); 992 EXPECT_EQ(2, wdst[1]); 993 #endif 994 } 995 996 // Test the case were we _just_ competely fit including the null. 997 { 998 char dst[8]; 999 wchar_t wdst[8]; 1000 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1001 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1002 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1003 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1004 } 1005 1006 // Test the case were we we are one smaller, so we can't fit the null. 1007 { 1008 char dst[7]; 1009 wchar_t wdst[7]; 1010 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1011 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); 1012 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1013 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); 1014 } 1015 1016 // Test the case were we are just too small. 1017 { 1018 char dst[3]; 1019 wchar_t wdst[3]; 1020 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1021 EXPECT_EQ(0, memcmp(dst, "ab", 3)); 1022 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1023 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); 1024 } 1025 } 1026 1027 TEST(StringUtilTest, WprintfFormatPortabilityTest) { 1028 struct TestData { 1029 const wchar_t* input; 1030 bool portable; 1031 } cases[] = { 1032 { L"%ls", true }, 1033 { L"%s", false }, 1034 { L"%S", false }, 1035 { L"%lS", false }, 1036 { L"Hello, %s", false }, 1037 { L"%lc", true }, 1038 { L"%c", false }, 1039 { L"%C", false }, 1040 { L"%lC", false }, 1041 { L"%ls %s", false }, 1042 { L"%s %ls", false }, 1043 { L"%s %ls %s", false }, 1044 { L"%f", true }, 1045 { L"%f %F", false }, 1046 { L"%d %D", false }, 1047 { L"%o %O", false }, 1048 { L"%u %U", false }, 1049 { L"%f %d %o %u", true }, 1050 { L"%-8d (%02.1f%)", true }, 1051 { L"% 10s", false }, 1052 { L"% 10ls", true } 1053 }; 1054 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 1055 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); 1056 } 1057 } 1058 1059 TEST(StringUtilTest, RemoveChars) { 1060 const char* kRemoveChars = "-/+*"; 1061 std::string input = "A-+bc/d!*"; 1062 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input)); 1063 EXPECT_EQ("Abcd!", input); 1064 1065 // No characters match kRemoveChars. 1066 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1067 EXPECT_EQ("Abcd!", input); 1068 1069 // Empty string. 1070 input.clear(); 1071 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1072 EXPECT_EQ(std::string(), input); 1073 } 1074 1075 TEST(StringUtilTest, ContainsOnlyChars) { 1076 // Providing an empty list of characters should return false but for the empty 1077 // string. 1078 EXPECT_TRUE(ContainsOnlyChars("", "")); 1079 EXPECT_FALSE(ContainsOnlyChars("Hello", "")); 1080 1081 EXPECT_TRUE(ContainsOnlyChars("", "1234")); 1082 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); 1083 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); 1084 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); 1085 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); 1086 } 1087 1088 } // namespace base 1089