1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_util.h" 6 7 #include <math.h> 8 #include <stdarg.h> 9 10 #include <algorithm> 11 12 #include "base/basictypes.h" 13 #include "base/strings/string16.h" 14 #include "base/strings/utf_string_conversions.h" 15 #include "testing/gmock/include/gmock/gmock.h" 16 #include "testing/gtest/include/gtest/gtest.h" 17 18 using ::testing::ElementsAre; 19 20 namespace base { 21 22 static const struct trim_case { 23 const wchar_t* input; 24 const TrimPositions positions; 25 const wchar_t* output; 26 const TrimPositions return_value; 27 } trim_cases[] = { 28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, 29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, 30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, 31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, 32 {L"", TRIM_ALL, L"", TRIM_NONE}, 33 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, 34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, 35 {L" ", TRIM_ALL, L"", TRIM_ALL}, 36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, 37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, 38 }; 39 40 static const struct trim_case_ascii { 41 const char* input; 42 const TrimPositions positions; 43 const char* output; 44 const TrimPositions return_value; 45 } trim_cases_ascii[] = { 46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, 47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, 48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, 49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, 50 {"", TRIM_ALL, "", TRIM_NONE}, 51 {" ", TRIM_LEADING, "", TRIM_LEADING}, 52 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, 53 {" ", TRIM_ALL, "", TRIM_ALL}, 54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, 55 }; 56 57 namespace { 58 59 // Helper used to test TruncateUTF8ToByteSize. 60 bool Truncated(const std::string& input, 61 const size_t byte_size, 62 std::string* output) { 63 size_t prev = input.length(); 64 TruncateUTF8ToByteSize(input, byte_size, output); 65 return prev != output->length(); 66 } 67 68 } // namespace 69 70 TEST(StringUtilTest, TruncateUTF8ToByteSize) { 71 std::string output; 72 73 // Empty strings and invalid byte_size arguments 74 EXPECT_FALSE(Truncated(std::string(), 0, &output)); 75 EXPECT_EQ(output, ""); 76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output)); 77 EXPECT_EQ(output, ""); 78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output)); 79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output)); 80 81 // Testing the truncation of valid UTF8 correctly 82 EXPECT_TRUE(Truncated("abc", 2, &output)); 83 EXPECT_EQ(output, "ab"); 84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output)); 85 EXPECT_EQ(output.compare("\xc2\x81"), 0); 86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output)); 87 EXPECT_EQ(output.compare("\xc2\x81"), 0); 88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output)); 89 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0); 90 91 { 92 const char array[] = "\x00\x00\xc2\x81\xc2\x81"; 93 const std::string array_string(array, arraysize(array)); 94 EXPECT_TRUE(Truncated(array_string, 4, &output)); 95 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0); 96 } 97 98 { 99 const char array[] = "\x00\xc2\x81\xc2\x81"; 100 const std::string array_string(array, arraysize(array)); 101 EXPECT_TRUE(Truncated(array_string, 4, &output)); 102 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0); 103 } 104 105 // Testing invalid UTF8 106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output)); 107 EXPECT_EQ(output.compare(""), 0); 108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output)); 109 EXPECT_EQ(output.compare(""), 0); 110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output)); 111 EXPECT_EQ(output.compare(""), 0); 112 113 // Testing invalid UTF8 mixed with valid UTF8 114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output)); 115 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0); 116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output)); 117 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0); 118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", 119 10, &output)); 120 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0); 121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0", 122 10, &output)); 123 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0); 124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output)); 125 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0); 126 127 // Overlong sequences 128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output)); 129 EXPECT_EQ(output.compare(""), 0); 130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output)); 131 EXPECT_EQ(output.compare(""), 0); 132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output)); 133 EXPECT_EQ(output.compare(""), 0); 134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output)); 135 EXPECT_EQ(output.compare(""), 0); 136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output)); 137 EXPECT_EQ(output.compare(""), 0); 138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output)); 139 EXPECT_EQ(output.compare(""), 0); 140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output)); 141 EXPECT_EQ(output.compare(""), 0); 142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output)); 143 EXPECT_EQ(output.compare(""), 0); 144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output)); 145 EXPECT_EQ(output.compare(""), 0); 146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output)); 147 EXPECT_EQ(output.compare(""), 0); 148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output)); 149 EXPECT_EQ(output.compare(""), 0); 150 151 // Beyond U+10FFFF (the upper limit of Unicode codespace) 152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output)); 153 EXPECT_EQ(output.compare(""), 0); 154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output)); 155 EXPECT_EQ(output.compare(""), 0); 156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output)); 157 EXPECT_EQ(output.compare(""), 0); 158 159 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output)); 161 EXPECT_EQ(output.compare(""), 0); 162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output)); 163 EXPECT_EQ(output.compare(""), 0); 164 165 { 166 const char array[] = "\x00\x00\xfe\xff"; 167 const std::string array_string(array, arraysize(array)); 168 EXPECT_TRUE(Truncated(array_string, 4, &output)); 169 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0); 170 } 171 172 // Variants on the previous test 173 { 174 const char array[] = "\xff\xfe\x00\x00"; 175 const std::string array_string(array, 4); 176 EXPECT_FALSE(Truncated(array_string, 4, &output)); 177 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0); 178 } 179 { 180 const char array[] = "\xff\x00\x00\xfe"; 181 const std::string array_string(array, arraysize(array)); 182 EXPECT_TRUE(Truncated(array_string, 4, &output)); 183 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0); 184 } 185 186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output)); 188 EXPECT_EQ(output.compare(""), 0); 189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output)); 190 EXPECT_EQ(output.compare(""), 0); 191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output)); 192 EXPECT_EQ(output.compare(""), 0); 193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output)); 194 EXPECT_EQ(output.compare(""), 0); 195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output)); 196 EXPECT_EQ(output.compare(""), 0); 197 198 // Strings in legacy encodings that are valid in UTF-8, but 199 // are invalid as UTF-8 in real data. 200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output)); 201 EXPECT_EQ(output.compare("caf"), 0); 202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output)); 203 EXPECT_EQ(output.compare(""), 0); 204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output)); 205 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7, 207 &output)); 208 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 209 210 // Testing using the same string as input and output. 211 EXPECT_FALSE(Truncated(output, 4, &output)); 212 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 213 EXPECT_TRUE(Truncated(output, 3, &output)); 214 EXPECT_EQ(output.compare("\xa7\x41"), 0); 215 216 // "abc" with U+201[CD] in windows-125[0-8] 217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output)); 218 EXPECT_EQ(output.compare("\x93" "abc"), 0); 219 220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output)); 222 EXPECT_EQ(output.compare(""), 0); 223 224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output)); 226 EXPECT_EQ(output.compare(""), 0); 227 } 228 229 TEST(StringUtilTest, TrimWhitespace) { 230 string16 output; // Allow contents to carry over to next testcase 231 for (size_t i = 0; i < arraysize(trim_cases); ++i) { 232 const trim_case& value = trim_cases[i]; 233 EXPECT_EQ(value.return_value, 234 TrimWhitespace(WideToUTF16(value.input), value.positions, 235 &output)); 236 EXPECT_EQ(WideToUTF16(value.output), output); 237 } 238 239 // Test that TrimWhitespace() can take the same string for input and output 240 output = ASCIIToUTF16(" This is a test \r\n"); 241 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output); 243 244 // Once more, but with a string of whitespace 245 output = ASCIIToUTF16(" \r\n"); 246 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 247 EXPECT_EQ(string16(), output); 248 249 std::string output_ascii; 250 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { 251 const trim_case_ascii& value = trim_cases_ascii[i]; 252 EXPECT_EQ(value.return_value, 253 TrimWhitespace(value.input, value.positions, &output_ascii)); 254 EXPECT_EQ(value.output, output_ascii); 255 } 256 } 257 258 static const struct collapse_case { 259 const wchar_t* input; 260 const bool trim; 261 const wchar_t* output; 262 } collapse_cases[] = { 263 {L" Google Video ", false, L"Google Video"}, 264 {L"Google Video", false, L"Google Video"}, 265 {L"", false, L""}, 266 {L" ", false, L""}, 267 {L"\t\rTest String\n", false, L"Test String"}, 268 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, 269 {L" Test \n \t String ", false, L"Test String"}, 270 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, 271 {L" Test String", false, L"Test String"}, 272 {L"Test String ", false, L"Test String"}, 273 {L"Test String", false, L"Test String"}, 274 {L"", true, L""}, 275 {L"\n", true, L""}, 276 {L" \r ", true, L""}, 277 {L"\nFoo", true, L"Foo"}, 278 {L"\r Foo ", true, L"Foo"}, 279 {L" Foo bar ", true, L"Foo bar"}, 280 {L" \tFoo bar \n", true, L"Foo bar"}, 281 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, 282 }; 283 284 TEST(StringUtilTest, CollapseWhitespace) { 285 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { 286 const collapse_case& value = collapse_cases[i]; 287 EXPECT_EQ(WideToUTF16(value.output), 288 CollapseWhitespace(WideToUTF16(value.input), value.trim)); 289 } 290 } 291 292 static const struct collapse_case_ascii { 293 const char* input; 294 const bool trim; 295 const char* output; 296 } collapse_cases_ascii[] = { 297 {" Google Video ", false, "Google Video"}, 298 {"Google Video", false, "Google Video"}, 299 {"", false, ""}, 300 {" ", false, ""}, 301 {"\t\rTest String\n", false, "Test String"}, 302 {" Test \n \t String ", false, "Test String"}, 303 {" Test String", false, "Test String"}, 304 {"Test String ", false, "Test String"}, 305 {"Test String", false, "Test String"}, 306 {"", true, ""}, 307 {"\n", true, ""}, 308 {" \r ", true, ""}, 309 {"\nFoo", true, "Foo"}, 310 {"\r Foo ", true, "Foo"}, 311 {" Foo bar ", true, "Foo bar"}, 312 {" \tFoo bar \n", true, "Foo bar"}, 313 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, 314 }; 315 316 TEST(StringUtilTest, CollapseWhitespaceASCII) { 317 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { 318 const collapse_case_ascii& value = collapse_cases_ascii[i]; 319 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); 320 } 321 } 322 323 TEST(StringUtilTest, IsStringUTF8) { 324 EXPECT_TRUE(IsStringUTF8("abc")); 325 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); 326 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); 327 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); 328 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); 329 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM 330 331 // surrogate code points 332 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); 333 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); 334 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); 335 336 // overlong sequences 337 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 338 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" 339 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 340 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 341 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff 342 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D 343 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 345 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) 346 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F 347 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 348 349 // Beyond U+10FFFF (the upper limit of Unicode codespace) 350 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 351 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes 352 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes 353 354 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 355 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); 356 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); 357 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); 358 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); 359 360 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 361 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) 362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE 363 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF 364 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 365 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF 366 // Strings in legacy encodings. We can certainly make up strings 367 // in a legacy encoding that are valid in UTF-8, but in real data, 368 // most of them are invalid as UTF-8. 369 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 370 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR 371 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 372 // "abc" with U+201[CD] in windows-125[0-8] 373 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); 374 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 375 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); 376 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 377 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); 378 379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 380 // representation, and the second uses a 2-byte sequence. The second version 381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a 382 // given codepoint must be used. 383 static const char kEmbeddedNull[] = "embedded\0null"; 384 EXPECT_TRUE(IsStringUTF8( 385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); 386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); 387 } 388 389 TEST(StringUtilTest, ConvertASCII) { 390 static const char* char_cases[] = { 391 "Google Video", 392 "Hello, world\n", 393 "0123ABCDwxyz \a\b\t\r\n!+,.~" 394 }; 395 396 static const wchar_t* const wchar_cases[] = { 397 L"Google Video", 398 L"Hello, world\n", 399 L"0123ABCDwxyz \a\b\t\r\n!+,.~" 400 }; 401 402 for (size_t i = 0; i < arraysize(char_cases); ++i) { 403 EXPECT_TRUE(IsStringASCII(char_cases[i])); 404 string16 utf16 = ASCIIToUTF16(char_cases[i]); 405 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16); 406 407 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i])); 408 EXPECT_EQ(char_cases[i], ascii); 409 } 410 411 EXPECT_FALSE(IsStringASCII("Google \x80Video")); 412 413 // Convert empty strings. 414 string16 empty16; 415 std::string empty; 416 EXPECT_EQ(empty, UTF16ToASCII(empty16)); 417 EXPECT_EQ(empty16, ASCIIToUTF16(empty)); 418 419 // Convert strings with an embedded NUL character. 420 const char chars_with_nul[] = "test\0string"; 421 const int length_with_nul = arraysize(chars_with_nul) - 1; 422 std::string string_with_nul(chars_with_nul, length_with_nul); 423 std::wstring wide_with_nul = ASCIIToWide(string_with_nul); 424 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), 425 wide_with_nul.length()); 426 std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul)); 427 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), 428 narrow_with_nul.length()); 429 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); 430 } 431 432 TEST(StringUtilTest, ToUpperASCII) { 433 EXPECT_EQ('C', ToUpperASCII('C')); 434 EXPECT_EQ('C', ToUpperASCII('c')); 435 EXPECT_EQ('2', ToUpperASCII('2')); 436 437 EXPECT_EQ(L'C', ToUpperASCII(L'C')); 438 EXPECT_EQ(L'C', ToUpperASCII(L'c')); 439 EXPECT_EQ(L'2', ToUpperASCII(L'2')); 440 441 std::string in_place_a("Cc2"); 442 StringToUpperASCII(&in_place_a); 443 EXPECT_EQ("CC2", in_place_a); 444 445 std::wstring in_place_w(L"Cc2"); 446 StringToUpperASCII(&in_place_w); 447 EXPECT_EQ(L"CC2", in_place_w); 448 449 std::string original_a("Cc2"); 450 std::string upper_a = StringToUpperASCII(original_a); 451 EXPECT_EQ("CC2", upper_a); 452 453 std::wstring original_w(L"Cc2"); 454 std::wstring upper_w = StringToUpperASCII(original_w); 455 EXPECT_EQ(L"CC2", upper_w); 456 } 457 458 TEST(StringUtilTest, LowerCaseEqualsASCII) { 459 static const struct { 460 const char* src_a; 461 const char* dst; 462 } lowercase_cases[] = { 463 { "FoO", "foo" }, 464 { "foo", "foo" }, 465 { "FOO", "foo" }, 466 }; 467 468 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { 469 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a), 470 lowercase_cases[i].dst)); 471 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, 472 lowercase_cases[i].dst)); 473 } 474 } 475 476 TEST(StringUtilTest, FormatBytesUnlocalized) { 477 static const struct { 478 int64 bytes; 479 const char* expected; 480 } cases[] = { 481 // Expected behavior: we show one post-decimal digit when we have 482 // under two pre-decimal digits, except in cases where it makes no 483 // sense (zero or bytes). 484 // Since we switch units once we cross the 1000 mark, this keeps 485 // the display of file sizes or bytes consistently around three 486 // digits. 487 {0, "0 B"}, 488 {512, "512 B"}, 489 {1024*1024, "1.0 MB"}, 490 {1024*1024*1024, "1.0 GB"}, 491 {10LL*1024*1024*1024, "10.0 GB"}, 492 {99LL*1024*1024*1024, "99.0 GB"}, 493 {105LL*1024*1024*1024, "105 GB"}, 494 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"}, 495 {~(1LL<<63), "8192 PB"}, 496 497 {99*1024 + 103, "99.1 kB"}, 498 {1024*1024 + 103, "1.0 MB"}, 499 {1024*1024 + 205 * 1024, "1.2 MB"}, 500 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"}, 501 {10LL*1024*1024*1024, "10.0 GB"}, 502 {100LL*1024*1024*1024, "100 GB"}, 503 }; 504 505 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 506 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), 507 FormatBytesUnlocalized(cases[i].bytes)); 508 } 509 } 510 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { 511 static const struct { 512 const char* str; 513 string16::size_type start_offset; 514 const char* find_this; 515 const char* replace_with; 516 const char* expected; 517 } cases[] = { 518 {"aaa", 0, "a", "b", "bbb"}, 519 {"abb", 0, "ab", "a", "ab"}, 520 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, 521 {"Not found", 0, "x", "0", "Not found"}, 522 {"Not found again", 5, "x", "0", "Not found again"}, 523 {" Making it much longer ", 0, " ", "Four score and seven years ago", 524 "Four score and seven years agoMakingFour score and seven years agoit" 525 "Four score and seven years agomuchFour score and seven years agolonger" 526 "Four score and seven years ago"}, 527 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 528 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, 529 {"abababab", 2, "ab", "c", "abccc"}, 530 }; 531 532 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 533 string16 str = ASCIIToUTF16(cases[i].str); 534 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, 535 ASCIIToUTF16(cases[i].find_this), 536 ASCIIToUTF16(cases[i].replace_with)); 537 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 538 } 539 } 540 541 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { 542 static const struct { 543 const char* str; 544 string16::size_type start_offset; 545 const char* find_this; 546 const char* replace_with; 547 const char* expected; 548 } cases[] = { 549 {"aaa", 0, "a", "b", "baa"}, 550 {"abb", 0, "ab", "a", "ab"}, 551 {"Removing some substrings inging", 0, "ing", "", 552 "Remov some substrings inging"}, 553 {"Not found", 0, "x", "0", "Not found"}, 554 {"Not found again", 5, "x", "0", "Not found again"}, 555 {" Making it much longer ", 0, " ", "Four score and seven years ago", 556 "Four score and seven years agoMaking it much longer "}, 557 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 558 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, 559 {"abababab", 2, "ab", "c", "abcabab"}, 560 }; 561 562 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 563 string16 str = ASCIIToUTF16(cases[i].str); 564 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, 565 ASCIIToUTF16(cases[i].find_this), 566 ASCIIToUTF16(cases[i].replace_with)); 567 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 568 } 569 } 570 571 TEST(StringUtilTest, HexDigitToInt) { 572 EXPECT_EQ(0, HexDigitToInt('0')); 573 EXPECT_EQ(1, HexDigitToInt('1')); 574 EXPECT_EQ(2, HexDigitToInt('2')); 575 EXPECT_EQ(3, HexDigitToInt('3')); 576 EXPECT_EQ(4, HexDigitToInt('4')); 577 EXPECT_EQ(5, HexDigitToInt('5')); 578 EXPECT_EQ(6, HexDigitToInt('6')); 579 EXPECT_EQ(7, HexDigitToInt('7')); 580 EXPECT_EQ(8, HexDigitToInt('8')); 581 EXPECT_EQ(9, HexDigitToInt('9')); 582 EXPECT_EQ(10, HexDigitToInt('A')); 583 EXPECT_EQ(11, HexDigitToInt('B')); 584 EXPECT_EQ(12, HexDigitToInt('C')); 585 EXPECT_EQ(13, HexDigitToInt('D')); 586 EXPECT_EQ(14, HexDigitToInt('E')); 587 EXPECT_EQ(15, HexDigitToInt('F')); 588 589 // Verify the lower case as well. 590 EXPECT_EQ(10, HexDigitToInt('a')); 591 EXPECT_EQ(11, HexDigitToInt('b')); 592 EXPECT_EQ(12, HexDigitToInt('c')); 593 EXPECT_EQ(13, HexDigitToInt('d')); 594 EXPECT_EQ(14, HexDigitToInt('e')); 595 EXPECT_EQ(15, HexDigitToInt('f')); 596 } 597 598 // This checks where we can use the assignment operator for a va_list. We need 599 // a way to do this since Visual C doesn't support va_copy, but assignment on 600 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this 601 // capability. 602 static void VariableArgsFunc(const char* format, ...) { 603 va_list org; 604 va_start(org, format); 605 606 va_list dup; 607 GG_VA_COPY(dup, org); 608 int i1 = va_arg(org, int); 609 int j1 = va_arg(org, int); 610 char* s1 = va_arg(org, char*); 611 double d1 = va_arg(org, double); 612 va_end(org); 613 614 int i2 = va_arg(dup, int); 615 int j2 = va_arg(dup, int); 616 char* s2 = va_arg(dup, char*); 617 double d2 = va_arg(dup, double); 618 619 EXPECT_EQ(i1, i2); 620 EXPECT_EQ(j1, j2); 621 EXPECT_STREQ(s1, s2); 622 EXPECT_EQ(d1, d2); 623 624 va_end(dup); 625 } 626 627 TEST(StringUtilTest, VAList) { 628 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); 629 } 630 631 // Test for Tokenize 632 template <typename STR> 633 void TokenizeTest() { 634 std::vector<STR> r; 635 size_t size; 636 637 size = Tokenize(STR("This is a string"), STR(" "), &r); 638 EXPECT_EQ(4U, size); 639 ASSERT_EQ(4U, r.size()); 640 EXPECT_EQ(r[0], STR("This")); 641 EXPECT_EQ(r[1], STR("is")); 642 EXPECT_EQ(r[2], STR("a")); 643 EXPECT_EQ(r[3], STR("string")); 644 r.clear(); 645 646 size = Tokenize(STR("one,two,three"), STR(","), &r); 647 EXPECT_EQ(3U, size); 648 ASSERT_EQ(3U, r.size()); 649 EXPECT_EQ(r[0], STR("one")); 650 EXPECT_EQ(r[1], STR("two")); 651 EXPECT_EQ(r[2], STR("three")); 652 r.clear(); 653 654 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r); 655 EXPECT_EQ(3U, size); 656 ASSERT_EQ(3U, r.size()); 657 EXPECT_EQ(r[0], STR("one")); 658 EXPECT_EQ(r[1], STR("two")); 659 EXPECT_EQ(r[2], STR("three;four")); 660 r.clear(); 661 662 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r); 663 EXPECT_EQ(4U, size); 664 ASSERT_EQ(4U, r.size()); 665 EXPECT_EQ(r[0], STR("one")); 666 EXPECT_EQ(r[1], STR("two")); 667 EXPECT_EQ(r[2], STR("three")); 668 EXPECT_EQ(r[3], STR("four")); 669 r.clear(); 670 671 size = Tokenize(STR("one, two, three"), STR(","), &r); 672 EXPECT_EQ(3U, size); 673 ASSERT_EQ(3U, r.size()); 674 EXPECT_EQ(r[0], STR("one")); 675 EXPECT_EQ(r[1], STR(" two")); 676 EXPECT_EQ(r[2], STR(" three")); 677 r.clear(); 678 679 size = Tokenize(STR("one, two, three, "), STR(","), &r); 680 EXPECT_EQ(4U, size); 681 ASSERT_EQ(4U, r.size()); 682 EXPECT_EQ(r[0], STR("one")); 683 EXPECT_EQ(r[1], STR(" two")); 684 EXPECT_EQ(r[2], STR(" three")); 685 EXPECT_EQ(r[3], STR(" ")); 686 r.clear(); 687 688 size = Tokenize(STR("one, two, three,"), STR(","), &r); 689 EXPECT_EQ(3U, size); 690 ASSERT_EQ(3U, r.size()); 691 EXPECT_EQ(r[0], STR("one")); 692 EXPECT_EQ(r[1], STR(" two")); 693 EXPECT_EQ(r[2], STR(" three")); 694 r.clear(); 695 696 size = Tokenize(STR(), STR(","), &r); 697 EXPECT_EQ(0U, size); 698 ASSERT_EQ(0U, r.size()); 699 r.clear(); 700 701 size = Tokenize(STR(","), STR(","), &r); 702 EXPECT_EQ(0U, size); 703 ASSERT_EQ(0U, r.size()); 704 r.clear(); 705 706 size = Tokenize(STR(",;:."), STR(".:;,"), &r); 707 EXPECT_EQ(0U, size); 708 ASSERT_EQ(0U, r.size()); 709 r.clear(); 710 711 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r); 712 EXPECT_EQ(1U, size); 713 ASSERT_EQ(1U, r.size()); 714 EXPECT_EQ(r[0], STR("a")); 715 r.clear(); 716 717 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r); 718 EXPECT_EQ(2U, size); 719 ASSERT_EQ(2U, r.size()); 720 EXPECT_EQ(r[0], STR("\ta\t")); 721 EXPECT_EQ(r[1], STR("b\tcc")); 722 r.clear(); 723 } 724 725 TEST(StringUtilTest, TokenizeStdString) { 726 TokenizeTest<std::string>(); 727 } 728 729 TEST(StringUtilTest, TokenizeStringPiece) { 730 TokenizeTest<base::StringPiece>(); 731 } 732 733 // Test for JoinString 734 TEST(StringUtilTest, JoinString) { 735 std::vector<std::string> in; 736 EXPECT_EQ("", JoinString(in, ',')); 737 738 in.push_back("a"); 739 EXPECT_EQ("a", JoinString(in, ',')); 740 741 in.push_back("b"); 742 in.push_back("c"); 743 EXPECT_EQ("a,b,c", JoinString(in, ',')); 744 745 in.push_back(std::string()); 746 EXPECT_EQ("a,b,c,", JoinString(in, ',')); 747 in.push_back(" "); 748 EXPECT_EQ("a|b|c|| ", JoinString(in, '|')); 749 } 750 751 // Test for JoinString overloaded with std::string separator 752 TEST(StringUtilTest, JoinStringWithString) { 753 std::string separator(", "); 754 std::vector<std::string> parts; 755 EXPECT_EQ(std::string(), JoinString(parts, separator)); 756 757 parts.push_back("a"); 758 EXPECT_EQ("a", JoinString(parts, separator)); 759 760 parts.push_back("b"); 761 parts.push_back("c"); 762 EXPECT_EQ("a, b, c", JoinString(parts, separator)); 763 764 parts.push_back(std::string()); 765 EXPECT_EQ("a, b, c, ", JoinString(parts, separator)); 766 parts.push_back(" "); 767 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|")); 768 } 769 770 // Test for JoinString overloaded with string16 separator 771 TEST(StringUtilTest, JoinStringWithString16) { 772 string16 separator = ASCIIToUTF16(", "); 773 std::vector<string16> parts; 774 EXPECT_EQ(string16(), JoinString(parts, separator)); 775 776 parts.push_back(ASCIIToUTF16("a")); 777 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator)); 778 779 parts.push_back(ASCIIToUTF16("b")); 780 parts.push_back(ASCIIToUTF16("c")); 781 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator)); 782 783 parts.push_back(ASCIIToUTF16("")); 784 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator)); 785 parts.push_back(ASCIIToUTF16(" ")); 786 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|"))); 787 } 788 789 TEST(StringUtilTest, StartsWith) { 790 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true)); 791 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true)); 792 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false)); 793 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false)); 794 EXPECT_FALSE(StartsWithASCII("java", "javascript", true)); 795 EXPECT_FALSE(StartsWithASCII("java", "javascript", false)); 796 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false)); 797 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true)); 798 EXPECT_TRUE(StartsWithASCII("java", std::string(), false)); 799 EXPECT_TRUE(StartsWithASCII("java", std::string(), true)); 800 801 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), 802 ASCIIToUTF16("javascript"), true)); 803 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"), 804 ASCIIToUTF16("javascript"), true)); 805 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), 806 ASCIIToUTF16("javascript"), false)); 807 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"), 808 ASCIIToUTF16("javascript"), false)); 809 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), 810 ASCIIToUTF16("javascript"), true)); 811 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), 812 ASCIIToUTF16("javascript"), false)); 813 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false)); 814 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true)); 815 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false)); 816 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true)); 817 } 818 819 TEST(StringUtilTest, EndsWith) { 820 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), 821 ASCIIToUTF16(".plugin"), true)); 822 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), 823 ASCIIToUTF16(".plugin"), true)); 824 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), 825 ASCIIToUTF16(".plugin"), false)); 826 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), 827 ASCIIToUTF16(".plugin"), false)); 828 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true)); 829 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false)); 830 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), 831 ASCIIToUTF16(".plugin"), true)); 832 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), 833 ASCIIToUTF16(".plugin"), false)); 834 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false)); 835 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true)); 836 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false)); 837 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true)); 838 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), 839 ASCIIToUTF16(".plugin"), false)); 840 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true)); 841 EXPECT_TRUE(EndsWith(string16(), string16(), false)); 842 EXPECT_TRUE(EndsWith(string16(), string16(), true)); 843 } 844 845 TEST(StringUtilTest, GetStringFWithOffsets) { 846 std::vector<string16> subst; 847 subst.push_back(ASCIIToUTF16("1")); 848 subst.push_back(ASCIIToUTF16("2")); 849 std::vector<size_t> offsets; 850 851 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), 852 subst, 853 &offsets); 854 EXPECT_EQ(2U, offsets.size()); 855 EXPECT_EQ(7U, offsets[0]); 856 EXPECT_EQ(25U, offsets[1]); 857 offsets.clear(); 858 859 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), 860 subst, 861 &offsets); 862 EXPECT_EQ(2U, offsets.size()); 863 EXPECT_EQ(25U, offsets[0]); 864 EXPECT_EQ(7U, offsets[1]); 865 offsets.clear(); 866 } 867 868 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { 869 // Test whether replacestringplaceholders works as expected when there 870 // are fewer inputs than outputs. 871 std::vector<string16> subst; 872 subst.push_back(ASCIIToUTF16("9a")); 873 subst.push_back(ASCIIToUTF16("8b")); 874 subst.push_back(ASCIIToUTF16("7c")); 875 876 string16 formatted = 877 ReplaceStringPlaceholders( 878 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); 879 880 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); 881 } 882 883 TEST(StringUtilTest, ReplaceStringPlaceholders) { 884 std::vector<string16> subst; 885 subst.push_back(ASCIIToUTF16("9a")); 886 subst.push_back(ASCIIToUTF16("8b")); 887 subst.push_back(ASCIIToUTF16("7c")); 888 subst.push_back(ASCIIToUTF16("6d")); 889 subst.push_back(ASCIIToUTF16("5e")); 890 subst.push_back(ASCIIToUTF16("4f")); 891 subst.push_back(ASCIIToUTF16("3g")); 892 subst.push_back(ASCIIToUTF16("2h")); 893 subst.push_back(ASCIIToUTF16("1i")); 894 895 string16 formatted = 896 ReplaceStringPlaceholders( 897 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); 898 899 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); 900 } 901 902 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) { 903 std::vector<string16> subst; 904 subst.push_back(ASCIIToUTF16("9a")); 905 subst.push_back(ASCIIToUTF16("8b")); 906 subst.push_back(ASCIIToUTF16("7c")); 907 subst.push_back(ASCIIToUTF16("6d")); 908 subst.push_back(ASCIIToUTF16("5e")); 909 subst.push_back(ASCIIToUTF16("4f")); 910 subst.push_back(ASCIIToUTF16("3g")); 911 subst.push_back(ASCIIToUTF16("2h")); 912 subst.push_back(ASCIIToUTF16("1i")); 913 subst.push_back(ASCIIToUTF16("0j")); 914 subst.push_back(ASCIIToUTF16("-1k")); 915 subst.push_back(ASCIIToUTF16("-2l")); 916 subst.push_back(ASCIIToUTF16("-3m")); 917 subst.push_back(ASCIIToUTF16("-4n")); 918 919 string16 formatted = 920 ReplaceStringPlaceholders( 921 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i," 922 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL); 923 924 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh," 925 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a")); 926 } 927 928 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { 929 std::vector<std::string> subst; 930 subst.push_back("9a"); 931 subst.push_back("8b"); 932 subst.push_back("7c"); 933 subst.push_back("6d"); 934 subst.push_back("5e"); 935 subst.push_back("4f"); 936 subst.push_back("3g"); 937 subst.push_back("2h"); 938 subst.push_back("1i"); 939 940 std::string formatted = 941 ReplaceStringPlaceholders( 942 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); 943 944 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); 945 } 946 947 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) { 948 std::vector<std::string> subst; 949 subst.push_back("a"); 950 subst.push_back("b"); 951 subst.push_back("c"); 952 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL), 953 "$1 $$2 $$$3"); 954 } 955 956 TEST(StringUtilTest, MatchPatternTest) { 957 EXPECT_TRUE(MatchPattern("www.google.com", "*.com")); 958 EXPECT_TRUE(MatchPattern("www.google.com", "*")); 959 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org")); 960 EXPECT_TRUE(MatchPattern("Hello", "H?l?o")); 961 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)")); 962 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM")); 963 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*")); 964 EXPECT_FALSE(MatchPattern("", "*.*")); 965 EXPECT_TRUE(MatchPattern("", "*")); 966 EXPECT_TRUE(MatchPattern("", "?")); 967 EXPECT_TRUE(MatchPattern("", "")); 968 EXPECT_FALSE(MatchPattern("Hello", "")); 969 EXPECT_TRUE(MatchPattern("Hello*", "Hello*")); 970 // Stop after a certain recursion depth. 971 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*")); 972 973 // Test UTF8 matching. 974 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0")); 975 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?.")); 976 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*")); 977 // Invalid sequences should be handled as a single invalid character. 978 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?")); 979 // If the pattern has invalid characters, it shouldn't match anything. 980 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80")); 981 982 // Test UTF16 character matching. 983 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"), 984 UTF8ToUTF16("*.com"))); 985 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"), 986 UTF8ToUTF16("He??o\\*1*"))); 987 988 // This test verifies that consecutive wild cards are collapsed into 1 989 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum 990 // recursion depth). 991 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"), 992 UTF8ToUTF16("He********************************o"))); 993 } 994 995 TEST(StringUtilTest, LcpyTest) { 996 // Test the normal case where we fit in our buffer. 997 { 998 char dst[10]; 999 wchar_t wdst[10]; 1000 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1001 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1002 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1003 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1004 } 1005 1006 // Test dst_size == 0, nothing should be written to |dst| and we should 1007 // have the equivalent of strlen(src). 1008 { 1009 char dst[2] = {1, 2}; 1010 wchar_t wdst[2] = {1, 2}; 1011 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); 1012 EXPECT_EQ(1, dst[0]); 1013 EXPECT_EQ(2, dst[1]); 1014 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); 1015 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]); 1016 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]); 1017 } 1018 1019 // Test the case were we _just_ competely fit including the null. 1020 { 1021 char dst[8]; 1022 wchar_t wdst[8]; 1023 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1024 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1025 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1026 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1027 } 1028 1029 // Test the case were we we are one smaller, so we can't fit the null. 1030 { 1031 char dst[7]; 1032 wchar_t wdst[7]; 1033 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1034 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); 1035 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1036 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); 1037 } 1038 1039 // Test the case were we are just too small. 1040 { 1041 char dst[3]; 1042 wchar_t wdst[3]; 1043 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1044 EXPECT_EQ(0, memcmp(dst, "ab", 3)); 1045 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1046 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); 1047 } 1048 } 1049 1050 TEST(StringUtilTest, WprintfFormatPortabilityTest) { 1051 static const struct { 1052 const wchar_t* input; 1053 bool portable; 1054 } cases[] = { 1055 { L"%ls", true }, 1056 { L"%s", false }, 1057 { L"%S", false }, 1058 { L"%lS", false }, 1059 { L"Hello, %s", false }, 1060 { L"%lc", true }, 1061 { L"%c", false }, 1062 { L"%C", false }, 1063 { L"%lC", false }, 1064 { L"%ls %s", false }, 1065 { L"%s %ls", false }, 1066 { L"%s %ls %s", false }, 1067 { L"%f", true }, 1068 { L"%f %F", false }, 1069 { L"%d %D", false }, 1070 { L"%o %O", false }, 1071 { L"%u %U", false }, 1072 { L"%f %d %o %u", true }, 1073 { L"%-8d (%02.1f%)", true }, 1074 { L"% 10s", false }, 1075 { L"% 10ls", true } 1076 }; 1077 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 1078 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); 1079 } 1080 1081 TEST(StringUtilTest, RemoveChars) { 1082 const char* kRemoveChars = "-/+*"; 1083 std::string input = "A-+bc/d!*"; 1084 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input)); 1085 EXPECT_EQ("Abcd!", input); 1086 1087 // No characters match kRemoveChars. 1088 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1089 EXPECT_EQ("Abcd!", input); 1090 1091 // Empty string. 1092 input.clear(); 1093 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1094 EXPECT_EQ(std::string(), input); 1095 } 1096 1097 TEST(StringUtilTest, ReplaceChars) { 1098 struct TestData { 1099 const char* input; 1100 const char* replace_chars; 1101 const char* replace_with; 1102 const char* output; 1103 bool result; 1104 } cases[] = { 1105 { "", "", "", "", false }, 1106 { "test", "", "", "test", false }, 1107 { "test", "", "!", "test", false }, 1108 { "test", "z", "!", "test", false }, 1109 { "test", "e", "!", "t!st", true }, 1110 { "test", "e", "!?", "t!?st", true }, 1111 { "test", "ez", "!", "t!st", true }, 1112 { "test", "zed", "!?", "t!?st", true }, 1113 { "test", "t", "!?", "!?es!?", true }, 1114 { "test", "et", "!>", "!>!>s!>", true }, 1115 { "test", "zest", "!", "!!!!", true }, 1116 { "test", "szt", "!", "!e!!", true }, 1117 { "test", "t", "test", "testestest", true }, 1118 }; 1119 1120 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 1121 std::string output; 1122 bool result = ReplaceChars(cases[i].input, 1123 cases[i].replace_chars, 1124 cases[i].replace_with, 1125 &output); 1126 EXPECT_EQ(cases[i].result, result); 1127 EXPECT_EQ(cases[i].output, output); 1128 } 1129 } 1130 1131 TEST(StringUtilTest, ContainsOnlyChars) { 1132 // Providing an empty list of characters should return false but for the empty 1133 // string. 1134 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string())); 1135 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string())); 1136 1137 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234")); 1138 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); 1139 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); 1140 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); 1141 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); 1142 1143 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII)); 1144 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII)); 1145 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII)); 1146 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII)); 1147 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII)); 1148 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII)); 1149 1150 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16)); 1151 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16)); 1152 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16)); 1153 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16)); 1154 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16)); 1155 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "), 1156 kWhitespaceUTF16)); 1157 } 1158 1159 class WriteIntoTest : public testing::Test { 1160 protected: 1161 static void WritesCorrectly(size_t num_chars) { 1162 std::string buffer; 1163 char kOriginal[] = "supercali"; 1164 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars); 1165 // Using std::string(buffer.c_str()) instead of |buffer| truncates the 1166 // string at the first \0. 1167 EXPECT_EQ(std::string(kOriginal, 1168 std::min(num_chars, arraysize(kOriginal) - 1)), 1169 std::string(buffer.c_str())); 1170 EXPECT_EQ(num_chars, buffer.size()); 1171 } 1172 }; 1173 1174 TEST_F(WriteIntoTest, WriteInto) { 1175 // Validate that WriteInto reserves enough space and 1176 // sizes a string correctly. 1177 WritesCorrectly(1); 1178 WritesCorrectly(2); 1179 WritesCorrectly(5000); 1180 1181 // Validate that WriteInto doesn't modify other strings 1182 // when using a Copy-on-Write implementation. 1183 const char kLive[] = "live"; 1184 const char kDead[] = "dead"; 1185 const std::string live = kLive; 1186 std::string dead = live; 1187 strncpy(WriteInto(&dead, 5), kDead, 4); 1188 EXPECT_EQ(kDead, dead); 1189 EXPECT_EQ(4u, dead.size()); 1190 EXPECT_EQ(kLive, live); 1191 EXPECT_EQ(4u, live.size()); 1192 } 1193 1194 } // namespace base 1195